简体   繁体   中英

Python - write to CSV from multiple JSON files

I have been trying and failing all day to write multiple JSON files I am pulling from the nys.gov website on COVID data to one (or more) CSV files.

I can successfully concatenate the JSON files but have not been able to but them together in a format I can utilize to make graphs. I know the issue is somewhere in my for loop but after many attempts I have not found a successful method of either appending the data into one csv or creating multiple csv files I can then work with in pandas. Here is my code, currently it seems it is iterating through the loops and dumping the final json into my csv...

import csv
import datetime 
import json
import pandas as pd
import urllib.request as request

one_day = datetime.datetime.today() - datetime.timedelta(days=1)
two_days = datetime.datetime.today() - datetime.timedelta(days=2)
thr_days = datetime.datetime.today() - datetime.timedelta(days=3)
for_days = datetime.datetime.today() - datetime.timedelta(days=4)
fiv_days = datetime.datetime.today() - datetime.timedelta(days=5)
six_days = datetime.datetime.today() - datetime.timedelta(days=6)
sev_days = datetime.datetime.today() - datetime.timedelta(days=7)
egt_days = datetime.datetime.today() - datetime.timedelta(days=8)

one_day_str = one_day.strftime("%Y-%m-%dT00:00:00.000")
two_day_str = two_days.strftime("%Y-%m-%dT00:00:00.000")
thr_day_str = thr_days.strftime("%Y-%m-%dT00:00:00.000")
for_day_str = for_days.strftime("%Y-%m-%dT00:00:00.000")
fiv_day_str = fiv_days.strftime("%Y-%m-%dT00:00:00.000")
six_day_str = six_days.strftime("%Y-%m-%dT00:00:00.000")
sev_day_str = sev_days.strftime("%Y-%m-%dT00:00:00.000")
egt_day_str = egt_days.strftime("%Y-%m-%dT00:00:00.000")

url_one = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + one_day_str
url_two = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + two_day_str
url_thr = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + thr_day_str
url_for = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + for_day_str
url_fiv = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + fiv_day_str
url_six = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + six_day_str
url_sev = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + sev_day_str
url_egt = 'https://health.data.ny.gov/resource/xdss-u53e.json?test_date=' + egt_day_str

url_lst = [url_one,url_two,url_thr, url_for, url_fiv, url_six, url_sev, url_egt]

d = []

def write_json(data, filename='data.json'):
    with open(filename,'w') as f: 
        json.dump(data, f, indent=4)

for url in url_lst:
    with request.urlopen(url) as response:
        #   print(url)
        source = response.read()
        data = json.loads(source)

    if len(data) == 0:
        continue

    with open ("covid.json", 'w') as outfile:
        json.dump(data, outfile)

    with open('covid.json') as json_data:
        j = json.load(json_data)
        d.append(j)
        write_json(d)  
    
    filename = "County Stats.csv"
    fields = ["Date","County", "New Positives", "All Positives", "New Tests", "All Tests"]
    with open(filename, 'w') as fw:
        cf = csv.writer(fw, lineterminator='\n')
        # write the header
        cf.writerow(fields)
        
        for counties in data:
            date = counties['test_date']
            cnty = counties['county']
            new_pos = counties['new_positives']
            cum_pos = counties['cumulative_number_of_positives']
            new_tests = counties['total_number_of_tests']
            cum_tests = counties['cumulative_number_of_tests']
            cf.writerow([date,cnty, new_pos, cum_pos, new_tests, cum_tests])

I'm probably somewhere between beginner and intermediate with python so please forgive any poor coding practices. Thanks in advance.

  • Conor

Ok, with the help of a former professor of mine and confidant I've figured it out!

A couple of things:

  1. I wasn't indexing the for loop, the addition of the index variable before the loop and index increment (index+=1) created an iterable for the csv file name.
  2. Then using an fstring the csv file name is updated each time the loop runs creating an individual file for each of the JSON resources that are pulled.
d = []
def write_json(data, filename='data.json'):
    with open(filename,'w') as f: 
        json.dump(data, f, indent=4)
 
index = 0
for url in url_lst:
    index +=1
    with request.urlopen(url) as response:
        source = response.read()
        data = json.loads(source)
 
        if len(data) == 0:
            continue

        with open ("covid.json", 'w') as outfile:
            json.dump(data, outfile)

        with open('covid.json') as json_data:
            j = json.load(json_data)
            d.append(j)
            write_json(d)  
        
        print(index)    
        fields = ["Date","County", "New Positives", "All Positives", "New Tests", "All Tests"]
        filename = f"County Stats {index}.csv"
        with open(filename, 'w') as fw:
            cf = csv.writer(fw, lineterminator='\n')
            # write the header
            cf.writerow(fields)
            for counties in data:   
                date = counties['test_date']
                cnty = counties['county']
                new_pos = counties['new_positives']
                cum_pos = counties['cumulative_number_of_positives']

                new_tests = counties['total_number_of_tests']
                cum_tests = counties['cumulative_number_of_tests']
                cf.writerow([date,cnty, new_pos, cum_pos, new_tests, cum_tests])

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM