![](/img/trans.png)
[英]How can I use a 3d numpy array of indices to retrieve the corresponding values i a 4d array?
[英]How can I efficiently convert a 4D numpy array into a pandas DataFrame with indices as columns?
我有一个形状为(4、155、240、240)的4D numpy数组。 我想创建一个pandas DataFrame,该数组的每个元素一行一行,五列:一个用于四个索引中的每个索引,一个用于数组中的值。 我现在正在使用的代码如下所示:
import pandas as pd
import numpy as np
# some array of this shape
im = np.zeros((4, 155, 240, 240))
df = {col: [] for col in ['mode', 'x', 'y', 'z', 'val']}
for idx, val in np.ndenumerate(im):
df['mode'].append(idx[0])
df['y'].append(idx[1])
df['x'].append(idx[2])
df['z'].append(idx[3])
df['val'].append(val)
df = pd.DataFrame(df)
有没有办法更有效地执行此操作,可能使用矢量化操作?
senderle的cartesian_product_transpose
(或cartesian_product
)是我知道将笛卡尔积枚举为NumPy数组的最快方法。
import numpy as np
import pandas as pd
import functools
im = np.array((4, 155, 240, 240))
im = np.arange(im.prod()).reshape(im)
def cartesian_product_transpose(*arrays):
"""
http://stackoverflow.com/a/11146645/190597 (senderle)
"""
broadcastable = np.ix_(*arrays)
broadcasted = np.broadcast_arrays(*broadcastable)
dtype = np.find_common_type([arr.dtype for arr in broadcasted], [])
rows, cols = functools.reduce(np.multiply, broadcasted[0].shape), len(broadcasted)
out = np.empty(rows * cols, dtype=dtype)
start, end = 0, rows
for a in broadcasted:
out[start:end] = a.reshape(-1)
start, end = end, end + rows
return out.reshape(cols, rows).T
df = pd.DataFrame(cartesian_product_broadcasted(*[np.arange(i) for i in im.shape]),
columns=['mode', 'x', 'y', 'z'])
df['val'] = im.ravel()
似乎您需要元素的索引,可以尝试使用numpy.meshgrid
:
arr = np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, im.shape), indexing="ij"))) + [im.ravel()])
arr
#array([[ 0., 0., 0., 0., 0.],
# [ 0., 0., 0., 1., 0.],
# [ 0., 0., 0., 2., 0.],
# ...,
# [ 3., 154., 239., 237., 0.],
# [ 3., 154., 239., 238., 0.],
# [ 3., 154., 239., 239., 0.]])
然后从中构造一个数据帧:
pd.DataFrame(arr, columns = ['mode', 'x', 'y', 'z', 'val'])
与pd.ndenumerate
上的普通for循环的时序比较:
mesh = pd.DataFrame(np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, im.shape), indexing="ij"))) + [im.ravel()]),
columns=["mode", "x", "y", "z", "val"])
loop = pd.DataFrame([index + (x,) for index, x in np.ndenumerate(im)], columns=["mode", "x", "y", "z", "val"])
(loop.values == mesh.values).all()
# True
%timeit mesh = pd.DataFrame(np.column_stack(list(map(np.ravel, np.meshgrid(*map(np.arange, im.shape), indexing="ij"))) + [im.ravel()]), columns=["mode", "x", "y", "z", "val"])
# 1 loop, best of 3: 2.07 s per loop
%timeit loop = pd.DataFrame([index + (x,) for index, x in np.ndenumerate(im)], columns=["mode", "x", "y", "z", "val"])
# 1 loop, best of 3: 1min 2s per loop
我会使用//
和%
的组合
r = np.arange(im.size)[:, None]
s = np.array(im.shape)
np.column_stack([r // (im.size // s.cumprod()) % s, im.ravel()])
演示版
im = np.zeros((2, 3, 2, 3), dtype=int)
r = np.arange(im.size)[:, None]
s = np.array(im.shape)
np.column_stack([r // (im.size // s.cumprod()) % s, im.ravel()])
array([[0, 0, 0, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 2, 0],
[0, 0, 1, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 2, 0],
[0, 1, 0, 0, 0],
[0, 1, 0, 1, 0],
[0, 1, 0, 2, 0],
[0, 1, 1, 0, 0],
[0, 1, 1, 1, 0],
[0, 1, 1, 2, 0],
[0, 2, 0, 0, 0],
[0, 2, 0, 1, 0],
[0, 2, 0, 2, 0],
[0, 2, 1, 0, 0],
[0, 2, 1, 1, 0],
[0, 2, 1, 2, 0],
[1, 0, 0, 0, 0],
[1, 0, 0, 1, 0],
[1, 0, 0, 2, 0],
[1, 0, 1, 0, 0],
[1, 0, 1, 1, 0],
[1, 0, 1, 2, 0],
[1, 1, 0, 0, 0],
[1, 1, 0, 1, 0],
[1, 1, 0, 2, 0],
[1, 1, 1, 0, 0],
[1, 1, 1, 1, 0],
[1, 1, 1, 2, 0],
[1, 2, 0, 0, 0],
[1, 2, 0, 1, 0],
[1, 2, 0, 2, 0],
[1, 2, 1, 0, 0],
[1, 2, 1, 1, 0],
[1, 2, 1, 2, 0]])
这是一种基于数组初始化的方法-
def meshed_df(im):
s0,s1,s2,s3 = im.shape
r0,r1,r2,r3 = np.ogrid[:s0,:s1,:s2,:s3]
out = np.empty((s0,s1,s2,s3,5),dtype=im.dtype)
out[...,0] = r0 # np.arange(s0)[:,None,None,None]
out[...,1] = im
out[...,2] = r2 # np.arange(s2)[None,None,:,None]
out[...,3] = r1 # np.arange(s1)[None,:,None,None]
out[...,4] = r3 # np.arange(s3)[None,None,None,:]
return pd.DataFrame(out.reshape(-1,5), columns =[['mode', 'val','x', 'y', 'z']])
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.