#### Vectorize a 6 for loop cumulative sum in python

``````import numpy as np

def func1(a,b,c,d):
'''
Minimal working example of multiple summation
'''
B = 0
for ai in range(0,a):
for bi in range(0,b):
for ci in range(0,c):
for di in range(0,d):
for ei in range(0,ai+bi):
for fi in range(0,ci+di):
B += (2)**(ei-fi-ai-ci-di+1)*(ei**2-2*(ei*fi)-7*di)*np.math.factorial(ei)

return a, b, c, d, B
``````

4 个回复

``````import numpy as np
from numba import as nb

@nb.njit()
def func1_jit(a, b, c, d):
# Precompute
exp_min = 5 - (a + b + c + d)
exp_max = b
exp = 2. ** np.arange(exp_min, exp_max + 1)
fact_e = np.empty((a + b - 2))
fact_e[0] = 1
for ei in range(1, len(fact_e)):
fact_e[ei] = ei * fact_e[ei - 1]
# Loops
B = 0
for ai in range(0, a):
for bi in range(0, b):
for ci in range(0, c):
for di in range(0, d):
for ei in range(0, ai + bi):
for fi in range(0, ci + di):
B += exp[ei - fi - ai - ci - di + 1 - exp_min] * (ei * ei - 2 * (ei * fi) - 7 * di) * fact_e[ei]
return B
``````

``````import numpy as np
from numba import as nb

@nb.njit(parallel=True)
def func1_par(a, b, c, d):
# Precompute
exp_min = 5 - (a + b + c + d)
exp_max = b
exp = 2. ** np.arange(exp_min, exp_max + 1)
fact_e = np.empty((a + b - 2))
fact_e[0] = 1
for ei in range(1, len(fact_e)):
fact_e[ei] = ei * fact_e[ei - 1]
# Loops
B = np.empty((a,))
for ai in nb.prange(0, a):
Bi = 0
for bi in range(0, b):
for ci in range(0, c):
for di in range(0, d):
for ei in range(0, ai + bi):
for fi in range(0, ci + di):
Bi += exp[ei - fi - ai - ci - di + 1 - exp_min] * (ei * ei - 2 * (ei * fi) - 7 * di) * fact_e[ei]
B[ai] = Bi
return np.sum(B)
``````

``````from numba import as nb

@nb.njit(parallel=True)
def func1_arr(a_arr, b_arr, c_arr, d_arr):
B_arr = np.empty((len(a_arr),))
for i in nb.prange(len(B_arr)):
B_arr[i] = func1_jit(a_arr[i], b_arr[i], c_arr[i], d_arr[i])
return B_arr
``````

``````import numpy as np
from numba import njit

def func1(a, b, c, d):
exp_min = 5 - (a + b + c + d)
exp_max = b
exp = 2. ** np.arange(exp_min, exp_max + 1)
ee = np.arange(a + b - 2)
fact_e = scipy.special.factorial(ee)
return func1_inner(a, b, c, d, exp_min, exp, fact_e)

@njit()
def func1_inner(a, b, c, d, exp_min, exp, fact_e):
B = 0
for ai in range(0, a):
for bi in range(0, b):
for ci in range(0, c):
for di in range(0, d):
for ei in range(0, ai + bi):
for fi in range(0, ci + di):
B += exp[ei - fi - ai - ci - di + 1 - exp_min] * (ei * ei - 2 * (ei * fi) - 7 * di) * fact_e[ei]
return B
``````

``````a, b, c, d = 4, 6, 3, 4
# The original function
%timeit func1_orig(a, b, c, d)
# 2.07 ms ± 33.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# The grid-evaluated function
%timeit func1_grid(a, b, c, d)
# 256 µs ± 25 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# The precompuation + JIT-compiled function
%timeit func1_jit(a, b, c, d)
# 19.6 µs ± 3.25 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)
``````

``````import numpy as np
import scipy.special

def func1(a, b, c, d):
ai, bi, ci, di, ei, fi = np.ogrid[:a, :b, :c, :d, :a + b - 2, :c + d - 2]
# Compute
B = (2.) ** (ei - fi - ai - ci - di + 1) * (ei ** 2 - 2 * (ei * fi) - 7 * di) * scipy.special.factorial(ei)
# Mask out of range elements for last two inner loops
m = (ei < ai + bi) & (fi < ci + di)
return np.sum(B * m)

print(func1(4, 6, 3, 4))
# 21769947.844726562
``````

Obivously，因为你增加了参数的这种内存的成本会增长非常快。 代码实际上执行的计算比必要的多，因为两个内部循环具有不同的迭代次数，因此（在此方法中）您必须使用最大的，然后删除您不需要的。 希望是矢量化将弥补这一点。 一个小的IPython基准：

``````a, b, c, d = 4, 6, 3, 4
# func1_orig is the original loop-based version
%timeit func1_orig(a, b, c, d)
# 2.9 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# func1 here is the vectorized version
%timeit func1(a, b, c, d)
# 210 µs ± 6.34 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
``````

``````def func1(a, b, c, d):
B = 0
e = np.arange(a + b - 2).reshape((-1, 1))
f = np.arange(c + d - 2)
for ai in range(0, a):
for bi in range(0, b):
ei = e[:ai + bi]
for ci in range(0, c):
for di in range(0, d):
fi = f[:ci + di]
B += np.sum((2.) ** (ei - fi - ai - ci - di + 1) * (ei ** 2 - 2 * (ei * fi) - 7 * di) * scipy.special.factorial(ei))
return B
``````

``````import numpy as np
import numba as nb

@nb.njit()
def factorial(a):
res=1.
for i in range(1,a+1):
res*=i
return res

@nb.njit()
def func1(a, b, c, d):
B = 0.

exp_min = 5 - (a + b + c + d)
exp_max = b
exp = 2. ** np.arange(exp_min, exp_max + 1)

fact_e=np.empty(a + b - 2)
for i in range(a + b - 2):
fact_e[i]=factorial(i)

for ai in range(0, a):
for bi in range(0, b):
for ci in range(0, c):
for di in range(0, d):
for ei in range(0, ai + bi):
for fi in range(0, ci + di):
B += exp[ei - fi - ai - ci - di + 1 - exp_min] * (ei * ei - 2 * (ei * fi) - 7 * di) * fact_e[ei]
return B
``````

``````@nb.njit(parallel=True)
def func_p(a_vec,b_vec,c_vec,d_vec):
res=np.empty(a_vec.shape[0])
for i in nb.prange(a_vec.shape[0]):
res[i]=func1(a_vec[i], b_vec[i], c_vec[i], d_vec[i])
return res
``````

``````a_vec=np.random.randint(low=2,high=10,size=1000000)
b_vec=np.random.randint(low=2,high=10,size=1000000)
c_vec=np.random.randint(low=2,high=10,size=1000000)
d_vec=np.random.randint(low=2,high=10,size=1000000)

res_2=func_p(a_vec,b_vec,c_vec,d_vec)
``````

``````In [37]: def nested_sig(args):
...:     base_prod = cartesian_product(*arrays)
...:     second_prod = cartesian_product(base_prod[:,:2].sum(1), base_prod[:,2:].sum(1))
...:     total = np.column_stack((base_prod, second_prod))
...:     # the items in each row denotes the following variables in order:
...:     # ai, bi, ci, di, ei, fi
...:     x = total[:, 4] - total[:, 5] - total[:, 0] - total[:, 2] - total[:, 3] + 1
...:     y = total[:, 4] - total[:, 5]
...:     result = np.power(2, x) * (np.power(total[:, 4], 2) - 2*y - 7*total[:, 3]) * np.math.factorial(total[:,4])
...:     return result
``````

• `range(0,a)`

• 你在内循环中做了很多工作

• 您以随机的方式对术语求和，对于较大的条目存在精度损失的风险。

``````@numba.njit
def func1o(a,b,c,d):
"2**(ei-fi-ai-ci-di+1)*(ei**2-2*(ei*fi)-7*di)*ei!"
POW=2.;                 SUM=0.;
L=[]
for ai in arange(0.,a+1):
for bi in range(0,b+1):
for ci in range(0,c+1):
for di in range(0,d+1):
FACT=1.
for ei in arange(0,ai+bi+1):
for fi in range(0,ci+di+1):
L.append(POW*SUM*FACT)
POW /= 2
SUM -= 2*ei
POW *= 2
SUM += 2*(ei-fi)+1
FACT *= ei+1
POW /=2
SUM -= 7*di
POW /= 2
POW /= 2
A=np.array(L)
I=np.abs(A).argsort()
return A[I].sum()
``````
2 python中循环的向量化

3 在Python中向量化循环

2018-05-04 21:39:10 1 83   jit/ numba
4 如何在 Python 中向量化这个 for 循环？

6 加快/向量化Python中的嵌套循环

2018-06-03 21:20:18 1 99   nested
7 这两个循环如何在Python中向量化？

8 在Python中向量化具有重复索引的循环

9 向量化python中的简单函数：避免double for循环

2016-02-28 22:20:08 2 132   python
10 如何在python中向量化嵌套的for循环