將API文件中的JSON輸出解析為CSV

Question

我目前正在嘗試將JSON輸出從API請求轉換為CSV格式，以便將結果存儲到我們的數據庫中。 這是我當前的代碼供參考：

import pyodbc
import csv
#import urllib2
import json
import collections
import requests
#import pprint
#import functools

print ("Connecting via ODBC")

conn = pyodbc.connect('DSN=DSN', autocommit=True)

print ("Connected!\n")

cur = conn.cursor() 

sql = """SELECT DATA"""

cur.execute(sql)

#df = pandas.read_sql_query(sql, conn)

#df.to_csv('TEST.csv')

#print('CSV sheet is ready to go!')

rows = cur.fetchall()

obs_list = []

for row in rows:

    d = collections.OrderedDict()
    d['addressee'] = row.NAME
    d['street'] = row.ADDRESS
    d['city'] = row.CITY
    d['state'] = row.STATE
    d['zipcode'] = row.ZIP
    obs_list.append(d)

obs_file = 'TEST.json'
with open(obs_file, 'w') as file:
    json.dump(obs_list, file)


print('Run through API')


url = 'https://api.smartystreets.com/street-address?'

headers = {'content-type': 'application/json'}

with open('test1.json', 'r') as run:

    dict_run = run.readlines()

    dict_ready = (''.join(dict_run))

r = requests.post(url , data=dict_ready, headers=headers)

ss_output = r.text

output = 'output.json'

with open(output,'w') as of:

    json.dump(ss_output, of)

print('I think it works')

f = open('output.json')

   data = json.load(f)

data_1 = data['analysis']

data_2 = data['metadata']

data_3 = data['components']

entity_data = open('TEST.csv','w')

csvwriter = csv.writer(entity_data)

count = 0

count2 = 0

count3 = 0

for ent in data_1:

    if count == 0:

        header = ent.keys()

        csvwriter.writerow(header)

        count += 1

    csvwriter.writerow(ent.values())

for ent_2 in data_2:

    if count2 == 0:

        header2 = ent_2.keys()

        csvwriter.writerow(header2)

        count2 += 1

    csvwriter.writerow(ent_2.values())

for ent_3 in data_3:

    if count3 == 0:

        header3 = ent_3.keys()

        csvwriter.writerow(header3)

        count3 += 1

    csvwriter.writerow(ent_3.values())

entity_data.close()

API的示例輸出：

[
    {
        "input_index": 0,
        "candidate_index": 0,
        "delivery_line_1": "1 Santa Claus Ln",
        "last_line": "North Pole AK 99705-9901",
        "delivery_point_barcode": "997059901010",
        "components": {
            "primary_number": "1",
            "street_name": "Santa Claus",
            "street_suffix": "Ln",
            "city_name": "North Pole",
            "state_abbreviation": "AK",
            "zipcode": "99705",
            "plus4_code": "9901",
            "delivery_point": "01",
            "delivery_point_check_digit": "0"
        },
        "metadata": {
            "record_type": "S",
            "zip_type": "Standard",
            "county_fips": "02090",
            "county_name": "Fairbanks North Star",
            "carrier_route": "C004",
            "congressional_district": "AL",
            "rdi": "Commercial",
            "elot_sequence": "0001",
            "elot_sort": "A",
            "latitude": 64.75233,
            "longitude": -147.35297,
            "precision": "Zip8",
            "time_zone": "Alaska",
            "utc_offset": -9,
            "dst": true
        },
        "analysis": {
            "dpv_match_code": "Y",
            "dpv_footnotes": "AABB",
            "dpv_cmra": "N",
            "dpv_vacant": "N",
            "active": "Y",
            "footnotes": "L#"
        }
    },

    {
        "input_index": 1,
        "candidate_index": 0,
        "delivery_line_1": "Loop land 1",
        "last_line": "North Pole AK 99705-9901",
        "delivery_point_barcode": "997059901010",
        "components": {
            "primary_number": "1",
            "street_name": "Lala land",
            "street_suffix": "Ln",
            "city_name": "North Pole",
            "state_abbreviation": "AK",
            "zipcode": "99705",
            "plus4_code": "9901",
            "delivery_point": "01",
            "delivery_point_check_digit": "0"
        },
        "metadata": {
            "record_type": "S",
            "zip_type": "Standard",
            "county_fips": "02090",
            "county_name": "Fairbanks North Star",
            "carrier_route": "C004",
            "congressional_district": "AL",
            "rdi": "Commercial",
            "elot_sequence": "0001",
            "elot_sort": "A",
            "latitude": 64.75233,
            "longitude": -147.35297,
            "precision": "Zip8",
            "time_zone": "Alaska",
            "utc_offset": -9,
            "dst": true
        },
        "analysis": {
            "dpv_match_code": "Y",
            "dpv_footnotes": "AABB",
            "dpv_cmra": "N",
            "dpv_vacant": "N",
            "active": "Y",
            "footnotes": "L#"
        }
]

在存儲API輸出之后，問題就在於嘗試將返回的輸出（樣本輸出）解析為CSV格式。 我用來嘗試執行此操作的代碼：

f = open('output.json')

data = json.load(f)

data_1 = data['analysis']

data_2 = data['metadata']

data_3 = data['components']

entity_data = open('TEST.csv','w')

csvwriter = csv.writer(entity_data)

count = 0

count2 = 0

count3 = 0

for ent in data_1:

    if count == 0:

        header = ent.keys()

        csvwriter.writerow(header)

        count += 1

    csvwriter.writerow(ent.values())

for ent_2 in data_2:

    if count2 == 0:

        header2 = ent_2.keys()

        csvwriter.writerow(header2)

        count2 += 1

    csvwriter.writerow(ent_2.values())

for ent_3 in data_3:

    if count3 == 0:

        header3 = ent_3.keys()

        csvwriter.writerow(header3)

        count3 += 1

    csvwriter.writerow(ent_3.values())

entity_data.close()

返回以下錯誤：TypeError：字符串索引必須為整數。 正如某位友善的評論並指出的那樣，看來我是在遍歷鍵而不是不同的字典，而這正是我受困的原因，因為我不確定該怎么做？ 根據我的理解，似乎JSON分為3個不同的數組，每個數組都有JSON對象，但是根據結構，情況似乎並非如此嗎？ 對於代碼的長度，我深表歉意，但我希望上下文與我要完成的工作類似。

Answer 1

考慮熊貓的json_normalize()方法將嵌套的項目展平為表格df結構：

import pandas as pd
from pandas.io.json import json_normalize
import json

with open('Output.json') as f:
    data = json.load(f)

df = json_normalize(data)

df.to_csv('Output.csv')

請注意， components ， metadata和analysis變成了句點分隔的對應值的前綴。 如果不需要，請考慮重命名列。

Answer 2

您正在使用json保存請求的result.text 。 result.text是一個字符串，因此在通過json重新讀取它時，會得到一個相同的長字符串，而不是list 。 嘗試按原樣寫入result.text ：

output = 'output.json'
with open(output,'w') as of:
    of.write(ss_output)

這就是TypeError:string indices must be integers您提到的TypeError:string indices must be integers的原因。 您的其余代碼有多個問題。

json中的數據是一列字典，因此要獲得data_1您需要像這樣的列表理解： data_1 = [x['analysis'] for x in data]
您將三種類型的行寫入同一個csv文件中：組件，元數據和分析。 真的很奇怪

可能您必須重寫代碼的后半部分：每種數據類型打開三個csv_writers，然后遍歷data項並將其字段寫入相應的csv_writer。

將API文件中的JSON輸出解析為CSV

問題描述

2 個解決方案

解決方案1
2 已采納 2016-09-14 01:59:40

解決方案2
0 2016-09-13 23:15:11

將API文件中的JSON輸出解析為CSV

問題描述

2 個解決方案

解決方案1 2 已采納 2016-09-14 01:59:40

解決方案2 0 2016-09-13 23:15:11

解決方案1
2 已采納 2016-09-14 01:59:40

解決方案2
0 2016-09-13 23:15:11