Advice on plotting large amount of data

Question

I'm working on a very cheap seismometer mainly for educational purposes and some research. I would like to show every few hours the seismic signal of one of the channels as the image I have attached, using matplotlib .

The problem is that every second I get 100 datapoints and while plotting this data on a raspberry pi, usually hangs and stop working.

The way I plot the data for each 4 hours subplot is reading again all the data and plotting only between the limits of the subplot, but I find this not efficient and probably the cause of the raspberry hanging.

I've been thinking for days how I could do this to avoid using a lot of memory for each subplot, but I can't find an answer as I'm a geologist and programming is a big issue for me.

Does anybody have a better idea for doing this?

import matplotlib.pyplot as plt
import time
import os.path
import datetime
import sys
import numpy
import pytz
import matplotlib.dates as mdates
import ftplib
from pylab import *
import numpy as np
from itertools import islice
from time import sleep
from matplotlib.pyplot import specgram
from scipy.signal import medfilt
import csv
archivo='sismo1545436800'

def subirftp(archivosubir):
    session = ftplib.FTP('---', 's---   ', '----')
    file = open(archivosubir+'.png', 'rb')  # file to send
    session.storbinary('STOR '+ archivosubir +'.png', file)  # send the file
    dirlist = session.retrlines('LIST')
    file.close()  # close file and FTP
    session.quit()

font = {'family': 'serif',
            'color': 'darkred',
            'weight': 'normal',
            'size': 16,
            }

fu = open('Z:/nchazarra/sismografos/' + str(archivo) + '.txt')
nr_of_lines = sum(1 for line in fu)
fu.close()
f = open('Z:/nchazarra/sismografos/' + str(archivo) + '.txt')

print(nr_of_lines)
csv_f = csv.reader(f)
#row_count = sum(1 for row in csv_f)
#print(row_count)
tiempo = []
valora = []
valores = []
tiempor = []
i=0
final=0
empiezo=time.time()

for row in islice(csv_f,0,nr_of_lines-1):
        # print (row[0])
        if i == 0:
            inicio = double(row[0])
            valor = datetime.datetime.fromtimestamp(float(row[0]),tz=pytz.utc)
            tiempo.append(valor)
            i = i + 1
        else:
            valor = datetime.datetime.fromtimestamp(float(row[0]),tz=pytz.utc)
            #print(valor)
            tiempo.append(valor)
            # print(row)

        try:
            valora.append(int(row[1]))
            # print(row[0])
        except IndexError:
            valora.append(0)
        except ValueError:
            valora.append(0)

valores = valora
tiempor = tiempo
mediana = np.mean(valores)
minimo = np.amin(valores)
maximo = np.amax(valores)
std = np.std(valores)
for index in range(len(valores)):
        valores[index] = float(((valores[index] - minimo) / (maximo - minimo))-1)

mediananueva = float(np.median(valores))
for index in range(len(valores)):
    valores[index] = float(valores[index] - mediananueva)

valores2=np.asarray(valores)
tiempo2=np.asarray(tiempo)

#Franja de 0 a 4
franja1=plt.subplot(611)
franja1.axis([datetime.datetime(2018, 12, 22,00,00), datetime.datetime(2018, 12, 22,3,59,59),-0.05,0.05])
franja1.plot(tiempo2, valores2, lw=0.2,color='red')
#Franja de 4 a 8
franja2=plt.subplot(612)
franja2.axis([datetime.datetime(2018, 12, 22,4,00), datetime.datetime(2018, 12, 22,8,00),-0.05,0.05])
franja2.plot(tiempo2, valores2, lw=0.2,color='green')

#Franja de 8 a 12
franja3=plt.subplot(613)
franja3.axis([datetime.datetime(2018, 12, 22,8,00), datetime.datetime(2018, 12, 22,12,00),-0.05,0.05])
franja3.plot(tiempo2, valores2, lw=0.2,color='blue')
#Franja de 12 a 16

franja4=plt.subplot(614)
franja4.axis([datetime.datetime(2018, 12, 22,12,00), datetime.datetime(2018, 12, 22,16,00),-0.05,0.05])
franja4.plot(tiempo2, valores2, lw=0.2,color='red')

#franja de 16 a 20

franja5=plt.subplot(615)
franja5.axis([datetime.datetime(2018, 12, 22,16,00), datetime.datetime(2018, 12, 22,20,00),-0.05,0.05])
franja5.plot(tiempo2, valores2, lw=0.2,color='green')

#Franja de 20 a 24
franja6=plt.subplot(616)
franja6.axis([datetime.datetime(2018, 12, 22,20,00), datetime.datetime(2018, 12, 22,23,59,59),-0.05,0.05])
franja6.plot(tiempo2, valores2, lw=0.2,color='blue')

franja1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja3.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja4.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja5.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja6.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

acabo=time.time()
cuantotardo=acabo-empiezo
print('Madre mía, he tardado en hacer esto '+str(cuantotardo)+' segundos')

savefig(archivo + ".png", dpi=300)
subirftp(archivo)
plt.show()

Answer 1

Do you need to plot every data point? You could consider plotting every 100 or so. As long as the frequency of your signal isn't too high, I think it could work. Something like this:

import matplotlib.pyplot as plt
import numpy as np

X = np.arange(10000) / 10000 * 2 * np.pi
Y = np.sin(X) + np.random.normal(size=10000) / 10

plt.plot(X[::100], Y[::100])

versus all points:

Answer 2

You can save a fair bit of memory by sub-setting the arrays before you plot them:

import datetime

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np

n_times = 24 * 60 * 60 * 100
times = [
    datetime.datetime(2018, 12, 22,00,00) +
    datetime.timedelta(milliseconds=10 * x) for x in range(n_times)]
tiempo2 = np.array(times)
valores2 = np.random.normal(size=n_times)

#Franja de 0 a 4
franja1=plt.subplot(611)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 0, 0),
                       tiempo2 < datetime.datetime(2018, 12, 22, 4, 0, 0))
franja1.plot(tiempo2[index], valores2[index], lw=0.2,color='red')

#Franja de 4 a 8
franja2=plt.subplot(612)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 4, 0),
                       tiempo2 < datetime.datetime(2018, 12, 22, 8, 0, 0))
franja2.plot(tiempo2[index], valores2[index], lw=0.2,color='green')

#Franja de 8 a 12
franja3=plt.subplot(613)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 8, 0),
                       tiempo2 < datetime.datetime(2018, 12, 22, 12, 0, 0))
franja3.plot(tiempo2[index], valores2[index], lw=0.2,color='blue')
#Franja de 12 a 16

franja4=plt.subplot(614)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 12, 0),
                       tiempo2 < datetime.datetime(2018, 12, 22, 16, 0, 0))
franja4.plot(tiempo2[index], valores2[index], lw=0.2,color='red')

#franja de 16 a 20

franja5=plt.subplot(615)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 16, 0),
                       tiempo2 < datetime.datetime(2018, 12, 22, 20, 0, 0))
franja5.plot(tiempo2[index], valores2[index], lw=0.2,color='green')

#Franja de 20 a 24
franja6=plt.subplot(616)
index = np.logical_and(tiempo2 >= datetime.datetime(2018, 12, 22, 20, 0),
                       tiempo2 < datetime.datetime(2018, 12, 23, 0, 0, 0))
franja6.plot(tiempo2[index], valores2[index], lw=0.2,color='blue')

franja1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja3.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja4.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja5.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
franja6.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

plt.show()

Advice on plotting large amount of data

Question

2 answers

solution1
0 2018-12-29 21:21:47

solution2
0 2018-12-31 14:53:13

Advice on plotting large amount of data

Question

2 answers

solution1 0 2018-12-29 21:21:47

solution2 0 2018-12-31 14:53:13

solution1
0 2018-12-29 21:21:47

solution2
0 2018-12-31 14:53:13