Periodically (every 120 seconds) get data but recent data overwrites previous data in SQL DB. I want all data to be saved.In addition, is the timer correct?
import sqlalchemy as sa
import psycopg2
import requests as rq
import pandas as pd
import json
import time
start_time = time.time()
while True:
temp = pd.DataFrame()
df = pd.DataFrame()
vehicleList = {"SN63NBK", "YY67UTP"}
for ids in vehicleList:
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
r = r.text
temp = pd.read_json(r)
temp['Type'] = 'ids'
df = pd.concat([df, temp], sort=False).reset_index(drop=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434@127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='replace', index=False)
time.sleep(120.0 - ((time.time() - start_time) % 120.0))
I changed your code slightly, but I think the main problem is in if_exists
parameter which you should set to append
, as @K753 have mentioned in the comments.
Also, YY67UTP
id returns nothing, so I replaced it with another random id from the site to illustrate how code works.
def _data_gen(vehicles):
""" Yields a dataframe for each request """
for ids in vehicles:
time.sleep(1)
r = rq.get('https://api.tfl.gov.uk/Vehicle/' + ids + '/Arrivals')
temp = pd.read_json(r.text)
temp['Type'] = ids
yield temp
while True:
# how do you break from while loop if you need to?
vehicleList = {"SN63NBK", "YY67UTP"}
df = pd.concat(_data_gen(vehicleList), sort=False, ignore_index=True)
engine = sa.create_engine('postgresql+psycopg2://postgres:3434@127.0.0.1/postgres')
df['timing'] = list(map(lambda x: json.dumps(x), df['timing']))
df.to_sql('tfl_bus_pg6', engine, if_exists='append', index=False)
time.sleep(120)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.