I tried to speed up this code using numba, but it still seems very slow. Someone else told me that this code can be run in about 5 minutes using C++. Therefore I am wondering if there is any way to speed up this code further?
I tried Numba and Cython. Cython seems slower, but it might be due to my ignorance of Cython.
import numpy as np
from numba import jit
import time
start=time.time()
L = 100
n = int((L + 1)*(L+1))
realization = 1
Delta = 3.
Combinee = np.zeros((realization,2*n))
rr= np.zeros((n,3))
@jit(nopython=True,nogil=True)
def findr(rr):
L1 = L + 1
for s in range(n):
rr[s][0] = int(s/L1)%L1
rr[s][1] = s%L1
findr(rr)
@jit(nopython=True,nogil=True)
def checks():
pchargepos = np.random.rand(int(L*L*L/2),3)*L
nchargepos = np.random.rand(int(L*L*L/2),3)*L
Alln = np.ones((n,),dtype=np.int8)
phiext = np.zeros(n)
for pos1 in range(n):
for pos2 in range(int(L**3/2)):
phiext[pos1] += 1./np.linalg.norm(rr[pos1]-pchargepos[pos2]) - 1./np.linalg.norm(rr[pos1]-nchargepos[pos2])
phicol= np.zeros(n)
while True:
check = 0
sscheck = 0
for i in range(n):
oldn = Alln[i]
VB = -phiext[i] - phicol[i] - Delta/2
CB = -phiext[i] - phicol[i] + Delta/2
if Alln[i] == 0 and VB < 0:
Alln[i] = 1
if Alln[i] == 1 and VB > 0:
Alln[i] = 0
if Alln[i] == 1 and CB < 0:
Alln[i] = 2
if Alln[i] == 2 and CB > 0:
Alln[i] = 1
if Alln[i] != oldn:
check = 1
if Alln[i] != oldn:
for f in range(n):
if f != i:
phicol[f] -= (Alln[i] - oldn)/float(np.linalg.norm(rr[i]-rr[f]))
print(Alln)
for t in range(n):
for p in range(n):
if p != t:
secondcheck = 0
EE=phiext+phicol
Ep = EE[p]
Et = EE[t]
oldp = Alln[p]
oldt = Alln[t]
rtp = float(np.linalg.norm(rr[t]-rr[p]))
if oldt > 0 and oldp < 2:
if Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp < 0:
secondcheck = 1
if oldp > 0 and oldt < 2:
if -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < 0:
secondcheck = -1
if oldp == 1 and oldt == 1 and Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp < 0 and -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < 0:
if -Et + Ep + Delta*(1 + oldt - oldp) - 1/rtp < Et - Ep + Delta*(1 - oldt + oldp) - 1/rtp:
secondcheck = -1
else:
secondcheck = 1
if secondcheck == 1:
Alln[t] -= 1
Alln[p] += 1
if secondcheck == -1:
Alln[t] += 1
Alln[p] -= 1
if secondcheck != 0:
sscheck = 1
if secondcheck != 0:
phicol[p] += -(Alln[t] - oldt)/rtp
phicol[t] += -(Alln[p] - oldp)/rtp
for y in range(n):
if y != p and y != t:
phicol[y] += -(Alln[p] - oldp)/float(np.linalg.norm(rr[p]-rr[y])) - (Alln[t]-oldt)/float(np.linalg.norm(rr[t]-rr[y]))
print(check, " ", sscheck, ": ", Alln)
if check == 0 and sscheck == 0:
break
print("Last:")
print(Alln)
Efinial = np.zeros(2*n)
Ei = -phiext
for w in range(n):
for o in range(n):
if o != w:
row = np.linalg.norm(rr[o]-rr[w])
Ei[w] += (-1 + Alln[o])/float(row)
Efinial[w] = Ei[w] - Delta/2
Efinial[w + n] = Ei[w] + Delta/2
return Efinial
@jit(nopython=True,nogil=True)
def loop(Combinee1):
for d in range(realization):
Combinee1[d]=checks()
return Combinee1
Combinee=loop(Combinee)
Combinee=Combinee.flatten()
Combinee=Combinee.tolist()
open('bothcheckdata.txt','w').close()
f=open("bothcheckdata.txt", "a+")
f.write(str(Combinee))
f.close()
end=time.time()
print(end-start)
The code you have provided is pretty big and messy. However, the biggest problem is that use numpy in a wrong way. Numpy is all about vectorization not loops. So how can I further speed up my code?
- use numpy correctly.
Let's have a look at a very little portion of your code:
# original code with nested loops with +1 to avoid zero division
def f_1(rr, pchargepos, nchargepos, phiext):
for pos1 in range(n):
for pos2 in range(int(len(nchargepos))):
phiext[pos1] += (
1 / (np.linalg.norm(rr[pos1] - pchargepos[pos2]) + 1) -
1 / (np.linalg.norm(rr[pos1] - nchargepos[pos2]) + 1))
return phiext
# modified code with numpy methods instead of loops
def f_2(rr, pchargepos, nchargepos, phiext):
a = np.tile(pchargepos, (len(rr), 1))
b = np.tile(nchargepos, (len(rr), 1))
c = np.tile(rr, (len(pchargepos), 1)).T
d = 1 / (abs(c - a) + 1)
e = 1 / (abs(c - b) + 1)
phiext += (d - e).sum(axis=1)
return phiext
if __name__ == '__main__':
n, m = 100, 200
rr = np.random.randint(-10, 10, (n,))
pchargepos = np.random.randint(-10, 10, (m,))
nchargepos = np.random.randint(-10, 10, (m,))
phiext = np.zeros((n,))
ans_1 = f_1(rr, pchargepos, nchargepos, phiext)
phiext = np.zeros((n,))
ans_2 = f_2(rr, pchargepos, nchargepos, phiext)
# check results
print(sum(ans_1), sum(ans_2))
Running time on my machine:
%timeit f_1(rr, pchargepos, nchargepos, phiext)
211 ms ± 967 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit f_2(rr, pchargepos, nchargepos, phiext)
205 µs ± 214 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
So numpy is about 1000x faster than your nested loops in plain python.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.