I'm working on a big data set and i need to create and export images at each data point. I'm hitting the memory saturation at around the middle of the process. I applied some memory management tools and libraries and found that there's a memory leak and apparently comes from matplotlib. I've already tried the garbage collector, deleting all the variables within the loop, and even deleting the figure after saved but memory keeps increasing at each iteration. Here the code, the line by line memory analysis and the top 10 impact on the memory by "tracemalloc"
displacement=['Double sinus','Linear','Sinus in X','Sinus in Y']
os.chdir('E:\\Quality\\Magnitude')
disp_axis=[1,1,0,1]
import gc
from matplotlib import figure
import tracemalloc
tracemalloc.start(10)
for a in range(len(Folder_u)):
for b in range(len(Folder_u[a])):
u_exp = pd.read_csv(Folder_u[a][b],header=None,delimiter=' ',engine='pyarrow')
v_exp = pd.read_csv(Folder_v[a][b],header=None,delimiter=' ',engine='pyarrow')
TH_u = pd.read_csv(u_th[b],sep=',',header=None,engine='pyarrow')
TH_v = pd.read_csv(v_th[b],sep=',',header=None,engine='pyarrow')
M_exp = np.sqrt(u_exp.dropna(axis=1)**2+v_exp.dropna(axis=1)**2)
M_th = np.sqrt(TH_u**2+TH_v**2)
Error = abs(M_exp-M_th)/M_th *100
MM_E = Error.mean(axis=disp_axis[b])
MM_exp = M_exp.mean(axis=disp_axis[b])
MM_th = M_th.mean(axis=disp_axis[b])
name = N[a]
title=name.replace('.json','')
xth = np.linspace(0,len(MM_th),len(MM_th))
xe = np.linspace(0,len(MM_exp),len(MM_exp))
## mean error
#fig=figure.Figure(figsize=(15,10))
#ax = fig.subplots(1)
fig,ax = plt.subplots(figsize=(15,10))
plt.title('displacement = '+displacement[b]+', Config = ' + title ,fontsize=20,fontweight='bold')
#plt.title(Pyr_Type[l]+displacement[j])
ax.plot(MM_E,'b')
plt.ylim([0,100])
plt.yticks(np.linspace(0,100,11))
plt.ylabel('mean magnitude error [%]',fontsize=14)
plt.xlabel('Pixels',fontsize=14)
plt.grid()
#plt.ylim([0,5])
ax1=ax.twinx()
ax1.plot(xth,MM_th,'k',xe,MM_exp,'r-.')
plt.ylim([0,23])
plt.ylabel('mean displacement magnitude [Pixels]',fontsize=14)
plt.xlabel('Pixels',fontsize=14)
#plt.ylim([4,23])
#fig.set_dpi(300)
plt.grid()
fig.legend(['Mean error','Theoretical','Results'],fontsize=14,loc='upper right')
fig.set_dpi(250)
plt.show(fig)
plt.savefig('Mean_error_'+title+'_'+str(b)+'.png')
print(a)
del u_exp
del v_exp
del TH_u
del TH_v
del M_exp
del M_th
del Error
del MM_E
del MM_exp
del MM_th
del fig
del ax, ax1, name, title, xth,xe
gc.collect()
print('deleted')
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
tracemalloc.clear_traces()
Here the analysis of the memory broken down line by line:
Line # Mem usage Increment Occurrences Line Contents
=============================================================
241 2261.1 MiB 2261.1 MiB 1 @profile
242
243 def test():
244
245
246
247 #Pyr_Type = ['Haar (old)','Haar (NPP)','Burt (old)','Burt (new,old conv.)','Burt (new,new conv.)','Burt(NPP)']
248 2261.1 MiB 0.0 MiB 1 displacement=['Double sinus','Linear','Sinus in X','Sinus in Y']
249 2261.1 MiB 0.0 MiB 1 os.chdir('E:\\Quality\\Magnitude')
250 2261.1 MiB 0.0 MiB 1 disp_axis=[1,1,0,1]
251 2261.1 MiB 0.0 MiB 1 import gc
252 # from matplotlib import figure
253
254 2261.1 MiB 0.0 MiB 1 import tracemalloc
255 2261.1 MiB 0.0 MiB 1 tracemalloc.start(10)
256
257
258
259
260
261 2432.0 MiB 0.0 MiB 2 for a in range(1):
262 2432.0 MiB 0.0 MiB 5 for b in range(len(Folder_u[a])):
263 2361.8 MiB -34.9 MiB 4 u_exp = pd.read_csv(Folder_u[a][b],header=None,delimiter=' ',engine='pyarrow')
264 2361.0 MiB 2.0 MiB 4 v_exp = pd.read_csv(Folder_v[a][b],header=None,delimiter=' ',engine='pyarrow')
265 2399.2 MiB 110.1 MiB 4 TH_u = pd.read_csv(u_th[b],sep=',',header=None,engine='pyarrow')
266 2402.3 MiB 6.8 MiB 4 TH_v = pd.read_csv(v_th[b],sep=',',header=None,engine='pyarrow')
267 2433.4 MiB 126.2 MiB 4 M_exp = np.sqrt(u_exp.dropna(axis=1)**2+v_exp.dropna(axis=1)**2)
268 2464.9 MiB 124.5 MiB 4 M_th = np.sqrt(TH_u**2+TH_v**2)
269 2495.9 MiB 125.0 MiB 4 Error = abs(M_exp-M_th)/M_th *100
270 2495.9 MiB 0.0 MiB 4 MM_E = Error.mean(axis=disp_axis[b])
271 2495.9 MiB 0.0 MiB 4 MM_exp = M_exp.mean(axis=disp_axis[b])
272 2495.9 MiB 0.0 MiB 4 MM_th = M_th.mean(axis=disp_axis[b])
273
274 2495.9 MiB 0.0 MiB 4 name = N[a]
275 2495.9 MiB 0.0 MiB 4 title=name.replace('.json','')
276
277 2495.9 MiB 0.0 MiB 4 xth = np.linspace(0,len(MM_th),len(MM_th))
278 2495.9 MiB 0.0 MiB 4 xe = np.linspace(0,len(MM_exp),len(MM_exp))
279 ## mean error
280
281 #fig=figure.Figure(figsize=(15,10))
282 #ax = fig.subplots(1)
283 2497.9 MiB -27.5 MiB 4 fig,ax = plt.subplots(figsize=(15,10))
284
285 2497.9 MiB 0.0 MiB 4 plt.title('displacement = '+displacement[b]+', Config = ' + title ,fontsize=20,fontweight='bold')
286 #plt.title(Pyr_Type[l]+displacement[j])
287 2497.9 MiB 0.0 MiB 4 ax.plot(MM_E,'b')
288 2497.9 MiB 0.0 MiB 4 plt.ylim([0,100])
289 2499.9 MiB 10.0 MiB 4 plt.yticks(np.linspace(0,100,11))
290 2499.9 MiB 0.0 MiB 4 plt.ylabel('mean magnitude error [%]',fontsize=14)
291 2499.9 MiB 0.0 MiB 4 plt.xlabel('Pixels',fontsize=14)
292 2499.9 MiB 0.0 MiB 4 plt.grid()
293 #plt.ylim([0,5])
294
295 2499.9 MiB 0.0 MiB 4 ax1=ax.twinx()
296 2499.9 MiB 0.1 MiB 4 ax1.plot(xth,MM_th,'k',xe,MM_exp,'r-.')
297 2499.9 MiB 0.3 MiB 4 plt.ylim([0,23])
298 2499.9 MiB 4.0 MiB 4 plt.ylabel('mean displacement magnitude [Pixels]',fontsize=14)
299 2499.9 MiB 0.0 MiB 4 plt.xlabel('Pixels',fontsize=14)
300
301 2499.9 MiB 0.5 MiB 4 plt.grid()
302 2503.9 MiB 10.1 MiB 4 fig.legend(['Mean error','Theoretical','Results'],fontsize=14,loc='upper right')
303
304 2519.8 MiB 61.8 MiB 4 plt.savefig('Mean_error_'+title+'_'+str(b)+'.png',dpi=300)
305 2535.6 MiB 71.6 MiB 4 plt.show()
306 2535.6 MiB 0.0 MiB 4 print(a)
307
308 #fig.clear()
309 #plt.close()
310 #plt.cla()
311 2535.6 MiB 0.0 MiB 4 del u_exp
312 2535.6 MiB 0.0 MiB 4 del v_exp
313 2535.6 MiB 0.0 MiB 4 del TH_u
314 2535.6 MiB 0.0 MiB 4 del TH_v
315 2504.5 MiB -124.0 MiB 4 del M_exp
316 2473.5 MiB -124.0 MiB 4 del M_th
317 2442.5 MiB -124.0 MiB 4 del Error
318 2442.5 MiB 0.0 MiB 4 del MM_E
319 2442.5 MiB 0.0 MiB 4 del MM_exp
320 2442.5 MiB 0.0 MiB 4 del MM_th
321 2442.5 MiB 0.0 MiB 4 del fig
322 2442.5 MiB 0.0 MiB 4 del ax, ax1, name, title, xth,xe
323 2440.0 MiB -13.0 MiB 4 gc.collect()
324 2440.0 MiB 0.0 MiB 4 print('deleted')
325 2440.0 MiB 7.7 MiB 4 snapshot = tracemalloc.take_snapshot()
326 2440.0 MiB -2.8 MiB 4 top_stats = snapshot.statistics('lineno')
327 2432.0 MiB -42.0 MiB 4 tracemalloc.clear_traces()
328
329 #break
330
331 #break
332 #plt.show()
Here the top 10 from tracemalloc:
C:\Users\user\AppData\Roaming\Python\Python39\site-packages\pandas\core\internals\blocks.py:402: size=62.0 MiB, count=5, average=12.4 MiB
C:\Users\user\anaconda3\lib\site-packages\numexpr\necompiler.py:836: size=31.0 MiB, count=2, average=15.5 MiB
C:\Users\user\AppData\Roaming\Python\Python39\site-packages\pandas\core\indexes\base.py:2237: size=194 KiB, count=17, average=11.4 KiB
C:\Users\user\anaconda3\lib\selectors.py:315: size=144 KiB, count=5, average=28.8 KiB
C:\Users\user\anaconda3\lib\site-packages\numpy\core\_asarray.py:102: size=119 KiB, count=387, average=314 B
C:\Users\user\anaconda3\lib\linecache.py:137: size=114 KiB, count=1165, average=100 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\lines.py:377: size=97.0 KiB, count=45, average=2208 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\lines.py:672: size=95.9 KiB, count=28, average=3509 B
<unknown>:0: size=95.8 KiB, count=23, average=4264 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\text.py:265: size=90.6 KiB, count=42, average=2208 B
```````````
I thing the issue is that you create a large amount of figures. You can try to put
fig,ax = plt.subplots(figsize=(15,10))
outside the for nested for loop. To create only one figure. And then use
fig.clear()
inside to clear the figure before adding all the stuffs inside.
You could also plot the first figure outside the for loop and update the data of the figure with set_data
. The plt.show(fig)
is not needed if you save the figure.
I had the same issue, the following solution worked:
import matplotlib
matplotlib.use('Agg')
Source: https://matplotlib.org/stable/faq/howto_faq.html#work-with-threads
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.