[英]Applying a mask on another mask (or AND condition) for image analysis
[英]applying a binned mask to an image
我想屏蔽(设置为 0)非常大的图像中的一些像素。 我有一个值为 0 或 1 的掩码数组,用于指定要静音的图像像素。 该蒙版通常可以使用我的应用程序中图像查看器小部件中的橡皮擦工具生成。 稍后在通过将掩码像素与图像像素相乘进行图像分析时将其应用于数据。
出于性能原因,掩码的尺寸小于全分辨率图像。 掩码像素通常会覆盖全分辨率图像的 4*4 或 8*8 像素。
我想使用 numba 优化屏蔽 function 的性能。 我的问题是,每当我尝试并行化算法时,我的性能都会下降。
这是我的测试。 图像大小是我的真实数据的典型大小。
import numpy
import numba
def apply_binned_mask_numpy(image, mask, binning_factor):
new_image = image.copy()
for i in range(binning_factor):
for j in range(binning_factor):
image_slice = new_image[i::binning_factor, j::binning_factor]
actual_mask = mask[:image_slice.shape[0], :image_slice.shape[1]]
image_slice *= actual_mask
return new_image
@numba.jit
def apply_binned_mask_numba(image, mask, binning_factor):
new_image = image.copy()
for i in range(binning_factor):
for j in range(binning_factor):
image_slice = new_image[i::binning_factor, j::binning_factor]
actual_mask = mask[:image_slice.shape[0], :image_slice.shape[1]]
image_slice *= actual_mask
return new_image
@numba.njit(parallel=True)
def apply_binned_mask_numba_parallel(image, mask, binning_factor):
new_image = image.copy()
for i in numba.prange(binning_factor):
for j in range(binning_factor):
image_slice = new_image[i::binning_factor, j::binning_factor]
actual_mask = mask[:image_slice.shape[0], :image_slice.shape[1]]
image_slice *= actual_mask
return new_image
if __name__ == '__main__':
import time
a = numpy.arange(7997*7994).reshape((7997, 7994))
# mask with values 0 or 1
mask = numpy.random.randint(0, 2, (1000, 1000), dtype=numpy.uint8)
t0 = time.time()
b = apply_binned_mask_numpy(a, mask, 8)
print("numpy", time.time() - t0)
t0 = time.time()
c = apply_binned_mask_numba(a, mask, 8)
print("numba", time.time() - t0)
t0 = time.time()
d = apply_binned_mask_numba_parallel(a, mask, 8)
print("numba p", time.time() - t0)
assert numpy.array_equal(c, d)
此代码产生以下结果:
numpy 0.3541719913482666
numba 0.55484938621521
numba p 1.4546563625335693
我已经尝试了这种更天真的实现的变体,但没有明显的加速:
@numba.njit(parallel=True)
def apply_binned_mask_numba_parallel(image, mask, binning_factor):
new_image = image.copy()
for k in numba.prange(mask.shape[0]):
for l in range(mask.shape[1]):
for i in range(binning_factor):
for j in range(binning_factor):
row_idx = k * binning_factor + i
col_idx = l * binning_factor + j
if row_idx >= image.shape[0] or col_idx > image.shape[1]:
continue
new_image[row_idx, col_idx] *= mask[k, l]
return new_image
看来我没有从 numba 中获得任何性能提升。 知道我在这里做错了什么吗?
我忘了考虑编译时间。 如果我至少重复使用我的函数两次,第二次运行会更快。
if __name__ == '__main__':
import time
a = numpy.random.randint(0, 256, (15997, 7994), dtype=numpy.uint8)
# mask with values 0 or 1
mask = numpy.random.randint(0, 2, (2000, 1000), dtype=numpy.uint8)
binning = 8
t0 = time.time()
b = apply_binned_mask_numpy(a, mask, binning)
print("numpy", time.time() - t0)
t0 = time.time()
c = apply_binned_mask_numba(a, mask, binning)
print("numba", time.time() - t0)
t0 = time.time()
c = apply_binned_mask_numba(a, mask, binning)
print("numba run2", time.time() - t0)
t0 = time.time()
d = apply_binned_mask_numba_parallel(a, mask, binning)
print("numba p", time.time() - t0)
t0 = time.time()
d = apply_binned_mask_numba_parallel(a, mask, binning)
print("numba p run2", time.time() - t0)
assert numpy.array_equal(c, d)
结果是现在
numpy 0.3582308292388916
numba 0.5374748706817627
numba run2 0.21624493598937988
numba p 1.5098681449890137
numba p run2 0.16219329833984375
这更有意义,但我仍然对并行版本感到失望。
用“原始”版本替换并行代码仍然会降低性能:
@numba.njit(parallel=True)
def apply_binned_mask_numba_parallel(image, mask, binning_factor):
new_image = image.copy()
for i in numba.prange(image.size):
row = i // image.shape[1]
col = i - row * image.shape[1]
mask_row = row // binning_factor
mask_col = col // binning_factor
new_image[row, col] *= mask[mask_row, mask_col]
return new_image
结果是
numba p run2 0.6018044948577881
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.