![](/img/trans.png)
[英]Identify first non-zero/nan element within a group and ffill nan in numpy
[英]Identify the first & last non-zero elements/indices within a group in numpy
import numpy as np
group = np.array([0,0,0,0,1,1,1,1,1,1,2,2,2,2])
array = np.array([1,2,3,0,0,2,0,3,4,0,0,0,0,1])
targt = np.array([1,1,1,0,0,2,2,2,2,0,0,0,0,1])
def func(group: np.array, array: np.array):
pass
return array
我在這里找到了類似的答案: 如何在 numpy 數組的每一列中找到第一個非零值?
如果不拆分 arrays 或迭代,我怎么能做到這一點?
[我的解決方案]
def first_nonzero_index(arr: np.array, axis: int, mode: str = None, invalid_value: float = -1):
mask = arr != 0
if mode is None or mode == "head":
return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_value)
else:
return np.where(mask.any(axis=axis),
arr.shape[axis] - np.flip(mask, axis=axis).argmax(axis=axis) - 1, invalid_value)
def func(group: np.array, array: np.array):
group_size = np.bincount(group)[:-1]
group_idx_end = np.cumsum(group_size)
array_split = np.split(array, group_idx_end)
concat_list = []
for arr in array_split:
idx_start = first_nonzero_index(arr, axis=0, mode="head")
if idx_start != -1:
idx_end = first_nonzero_index(arr, axis=0, mode="tail") + 1
arr_ffill_first_nonzero = np.zeros_like(arr, dtype=float)
arr_ffill_first_nonzero[idx_start:idx_end] = arr[idx_start]
concat_list.append(arr_ffill_first_nonzero)
else:
concat_list.append(arr)
return np.hstack(concat_list)
Output:[1。 1. 1. 0. 0. 2. 2. 2. 2. 0. 0. 0. 0. 1.]
import numpy as np
import pandas as pd
def foo(s):
chk = np.where(s > 0)[0]
start = min(chk)
end = max(chk)
ans = [True if (start <= ind <= end) else False for ind in range(len(s))]
return ans
pd.Series(array).groupby(group).transform(
lambda x: x.mask(foo(x), x[x > 0].iloc[0])).to_numpy()
# array([1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1])
def split(val, grp):
inds = np.where(np.append(False, grp[1:] != grp[:-1]))[0]
return np.array_split(val, inds)
def fill(val):
inds = np.where(val > 0)[0]
start, end = min(inds), max(inds)
fill_val = val[start]
val[start:end + 1] = fill_val
return val
np.concatenate([fill(x) for x in split(array, group)])
# array([1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1])
group = np.array([0,0,0,0,1,1,1,1,1,1,2,2,2,2])
array = np.array([1,2,3,0,0,2,0,3,4,0,0,0,0,1])
targt = np.array([1,1,1,0,0,2,2,2,2,0,0,0,0,1])
您可以執行以下步驟:
STEP 1. 查找array
中非零項的索引並標記新組的開始
nonzero_idx -> [*0,1,2,/,*/,5,/,7,8,/,*/,/,/,13] (cross out slashes) marker_idx -> [0, 4, 10]
步驟 2. 查找每個組的開始和結束索引,使用np.ufunc.reduceat
starts -> [ 0, 5, 13] ends -> [ 2, 8, 13]
第 3 步。考慮一個out
數組,使得np.cumsum(out)
折疊到target
數組中。 像這樣:
[1,0,0,-1,0,2,0,0,0,-2,0,0,0,1] -> [1,1,1,0,0,2,2,2,2,0,0,0,0,1]
現在,代碼:
#STEP 1
nonzero = (array != 0)
_, marker_idx = np.unique(group[nonzero], return_index=True)
nonzero_idx = np.arange(len(array))[nonzero]
#STEP 2
starts = np.minimum.reduceat(nonzero_idx, marker_idx)
ends = np.maximum.reduceat(nonzero_idx, marker_idx)
#STEP 3
values = array[starts]
out = np.zeros_like(array)
out[starts] = values
#check the case we can't insert the last negative value
if ends[-1]+1==len(array):
out[ends[:-1]+1] = -values[:-1]
else:
out[ends+1] = -values
>>> np.cumsum(out)
array([1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1], dtype=int32)
無需循環!
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.