![](/img/trans.png)
[英]Python 3 - Parse JSON from multiple API requests into a list and output to a file
[英]Parse JSON output from API file into CSV
我目前正在嘗試將JSON輸出從API請求轉換為CSV格式,以便將結果存儲到我們的數據庫中。 這是我當前的代碼供參考:
import pyodbc
import csv
#import urllib2
import json
import collections
import requests
#import pprint
#import functools
print ("Connecting via ODBC")
conn = pyodbc.connect('DSN=DSN', autocommit=True)
print ("Connected!\n")
cur = conn.cursor()
sql = """SELECT DATA"""
cur.execute(sql)
#df = pandas.read_sql_query(sql, conn)
#df.to_csv('TEST.csv')
#print('CSV sheet is ready to go!')
rows = cur.fetchall()
obs_list = []
for row in rows:
d = collections.OrderedDict()
d['addressee'] = row.NAME
d['street'] = row.ADDRESS
d['city'] = row.CITY
d['state'] = row.STATE
d['zipcode'] = row.ZIP
obs_list.append(d)
obs_file = 'TEST.json'
with open(obs_file, 'w') as file:
json.dump(obs_list, file)
print('Run through API')
url = 'https://api.smartystreets.com/street-address?'
headers = {'content-type': 'application/json'}
with open('test1.json', 'r') as run:
dict_run = run.readlines()
dict_ready = (''.join(dict_run))
r = requests.post(url , data=dict_ready, headers=headers)
ss_output = r.text
output = 'output.json'
with open(output,'w') as of:
json.dump(ss_output, of)
print('I think it works')
f = open('output.json')
data = json.load(f)
data_1 = data['analysis']
data_2 = data['metadata']
data_3 = data['components']
entity_data = open('TEST.csv','w')
csvwriter = csv.writer(entity_data)
count = 0
count2 = 0
count3 = 0
for ent in data_1:
if count == 0:
header = ent.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(ent.values())
for ent_2 in data_2:
if count2 == 0:
header2 = ent_2.keys()
csvwriter.writerow(header2)
count2 += 1
csvwriter.writerow(ent_2.values())
for ent_3 in data_3:
if count3 == 0:
header3 = ent_3.keys()
csvwriter.writerow(header3)
count3 += 1
csvwriter.writerow(ent_3.values())
entity_data.close()
API的示例輸出:
[
{
"input_index": 0,
"candidate_index": 0,
"delivery_line_1": "1 Santa Claus Ln",
"last_line": "North Pole AK 99705-9901",
"delivery_point_barcode": "997059901010",
"components": {
"primary_number": "1",
"street_name": "Santa Claus",
"street_suffix": "Ln",
"city_name": "North Pole",
"state_abbreviation": "AK",
"zipcode": "99705",
"plus4_code": "9901",
"delivery_point": "01",
"delivery_point_check_digit": "0"
},
"metadata": {
"record_type": "S",
"zip_type": "Standard",
"county_fips": "02090",
"county_name": "Fairbanks North Star",
"carrier_route": "C004",
"congressional_district": "AL",
"rdi": "Commercial",
"elot_sequence": "0001",
"elot_sort": "A",
"latitude": 64.75233,
"longitude": -147.35297,
"precision": "Zip8",
"time_zone": "Alaska",
"utc_offset": -9,
"dst": true
},
"analysis": {
"dpv_match_code": "Y",
"dpv_footnotes": "AABB",
"dpv_cmra": "N",
"dpv_vacant": "N",
"active": "Y",
"footnotes": "L#"
}
},
{
"input_index": 1,
"candidate_index": 0,
"delivery_line_1": "Loop land 1",
"last_line": "North Pole AK 99705-9901",
"delivery_point_barcode": "997059901010",
"components": {
"primary_number": "1",
"street_name": "Lala land",
"street_suffix": "Ln",
"city_name": "North Pole",
"state_abbreviation": "AK",
"zipcode": "99705",
"plus4_code": "9901",
"delivery_point": "01",
"delivery_point_check_digit": "0"
},
"metadata": {
"record_type": "S",
"zip_type": "Standard",
"county_fips": "02090",
"county_name": "Fairbanks North Star",
"carrier_route": "C004",
"congressional_district": "AL",
"rdi": "Commercial",
"elot_sequence": "0001",
"elot_sort": "A",
"latitude": 64.75233,
"longitude": -147.35297,
"precision": "Zip8",
"time_zone": "Alaska",
"utc_offset": -9,
"dst": true
},
"analysis": {
"dpv_match_code": "Y",
"dpv_footnotes": "AABB",
"dpv_cmra": "N",
"dpv_vacant": "N",
"active": "Y",
"footnotes": "L#"
}
]
在存儲API輸出之后,問題就在於嘗試將返回的輸出(樣本輸出)解析為CSV格式。 我用來嘗試執行此操作的代碼:
f = open('output.json')
data = json.load(f)
data_1 = data['analysis']
data_2 = data['metadata']
data_3 = data['components']
entity_data = open('TEST.csv','w')
csvwriter = csv.writer(entity_data)
count = 0
count2 = 0
count3 = 0
for ent in data_1:
if count == 0:
header = ent.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(ent.values())
for ent_2 in data_2:
if count2 == 0:
header2 = ent_2.keys()
csvwriter.writerow(header2)
count2 += 1
csvwriter.writerow(ent_2.values())
for ent_3 in data_3:
if count3 == 0:
header3 = ent_3.keys()
csvwriter.writerow(header3)
count3 += 1
csvwriter.writerow(ent_3.values())
entity_data.close()
返回以下錯誤:TypeError:字符串索引必須為整數。 正如某位友善的評論並指出的那樣,看來我是在遍歷鍵而不是不同的字典,而這正是我受困的原因,因為我不確定該怎么做? 根據我的理解,似乎JSON分為3個不同的數組,每個數組都有JSON對象,但是根據結構,情況似乎並非如此嗎? 對於代碼的長度,我深表歉意,但我希望上下文與我要完成的工作類似。
考慮熊貓的json_normalize()
方法將嵌套的項目展平為表格df結構:
import pandas as pd
from pandas.io.json import json_normalize
import json
with open('Output.json') as f:
data = json.load(f)
df = json_normalize(data)
df.to_csv('Output.csv')
請注意, components , metadata和analysis變成了句點分隔的對應值的前綴。 如果不需要,請考慮重命名列。
您正在使用json保存請求的result.text
。 result.text
是一個字符串,因此在通過json重新讀取它時,會得到一個相同的長字符串,而不是list
。 嘗試按原樣寫入result.text
:
output = 'output.json'
with open(output,'w') as of:
of.write(ss_output)
這就是TypeError:string indices must be integers
您提到的TypeError:string indices must be integers
的原因。 您的其余代碼有多個問題。
json中的數據是一列字典,因此要獲得data_1
您需要像這樣的列表理解: data_1 = [x['analysis'] for x in data]
您將三種類型的行寫入同一個csv文件中:組件,元數據和分析。 真的很奇怪
可能您必須重寫代碼的后半部分:每種數據類型打開三個csv_writers,然后遍歷data
項並將其字段寫入相應的csv_writer。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.