简体   繁体   中英

Parse nested json to csv using Python Pandas

I have a json in below format:

{"MainName":[{"col1":"12345","col2":"False","col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}},{"col1":"12345","col2":"False","col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}}],"skip":0,"top":2,"next":"/v1/APIName?skip=2&top=2"}

I want to convert it into csv with below format:

MainName_col1,MainName_col2,MainName_col3,MainName_SubName1_col4,MainName_SubName1_SubName2_col5,MainName_SubName1_SubName2_col6,MainName_col7,MainName_SubName3_col8,MainName_SubName3_col9,MainName_col10,MainName_col11,MainName_col12,MainName_col13,MainName_SubName4_col14,MainName_SubName4_SubName5_col15,MainName_SubName4_SubName5_col16,MainName_col17,MainName_col18,MainName_col19,MainName_SubName6_col20,MainName_SubName6_SubName7_col21,MainName_SubName6_SubName7_col22
12345,False,190809,30.0,19703,USD,7372267,345337,PC,10265,29889004,calculated,9218,1,1970324,integer,434628,2020-02-06T13:47:40.000-0800,754878037,30.0,19703248,USD
12345,False,190809,30.0,19703,USD,7372267,345337,PC,10265,29889004,calculated,9218,2,123453,integer,434628,2020-02-06T13:47:40.000-0800,754878037,30.0,19703248,USD

Kindly help me out in this.

Use below function to flatten your JSON data.

dc = {"MainName":[{"col1":"12345","col2":False,"col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}}],"skip":0,"top":1,"next":"/v1/APIName?skip=1&top=1"}
def flatten(root: str, dict_obj: dict):
    flat = {}
    for i in dict_obj.keys():
        val = dict_obj[i]
        if not isinstance(val, dict) and not isinstance(val, list):
            flat[f'{root}_{i}'] = val
        else:
            if isinstance(val, list):
                val = val[-1]
            flat.update(flatten(f'{root}_{i}', val))
    return flat
flatten('MainName', dc['MainName'][0])

It will give you expected output. Then use it the way you want.

{'MainName_col1': '12345',
 'MainName_col2': False,
 'MainName_col3': '190809',
 'MainName_SubName1_col4': 30.0,
 'MainName_SubName1_SubName2_col5': '19703',
 'MainName_SubName1_SubName2_col6': 'USD',
 'MainName_col7': '7372267',
 'MainName_SubName3_col8': '345337',
 'MainName_SubName3_col9': 'PC',
 'MainName_col10': '10265',
 'MainName_col11': '29889004',
 'MainName_col12': 'calculated',
 'MainName_col13': '9218',
 'MainName_SubName4_col14': 1,
 'MainName_SubName4_SubName5_col15': '1970324',
 'MainName_SubName4_SubName5_col16': 'integer',
 'MainName_col17': '434628',
 'MainName_col18': '2020-02-06T13:47:40.000-0800',
 'MainName_col19': '754878037',
 'MainName_SubName6_col20': 30.0,
 'MainName_SubName6_SubName7_col21': '19703248',
 'MainName_SubName6_SubName7_col22': 'USD'}

As of my understanding, your dc will look like below

dc = {"MainName":[{"col1":"12345","col2":"False","col3":"190809","SubName1":{"col4":30.00,"SubName2":{"col5":"19703","col6":"USD"}},"col7":"7372267","SubName3":[{"col8":"345337","col9":"PC"}],"col10":"10265","col11":"29889004","col12":"calculated","col13":"9218","SubName4":{"col14":1,"SubName5":{"col15":"1970324","col16":"integer"}},"col17":"434628","col18":"2020-02-06T13:47:40.000-0800","col19":"754878037","SubName6":{"col20":30.00,"SubName7":{"col21":"19703248","col22":"USD"}}},{"col1_a":"12345XX","col2_b":"False","col3_c":"190809","SubName1":{"col4_d":30.00,"SubName2":{"col5_e":"19703","col6_f":"USD"}},"col7_g":"7372267","SubName3":[{"col8_h":"345337","col9":"PC"}],"col10_i":"10265","col11_j":"29889004","col12_k":"calculated","col13_l":"9218","SubName4":{"col14_m":1,"SubName5":{"col15_n":"1970324","col16_o":"integer"}},"col17_p":"434628","col18_q":"2020-02-06T13:47:40.000-0800","col19_r":"754878037","SubName6":{"col20_s":30.00,"SubName7":{"col21_t":"19703248","col22_u":"USDZZ"}}}],"skip":0,"top":2,"next":"/v1/APIName?skip=2&top=2"}

I used the above answer to flatten everything into single object

def flatten(root: str, dict_obj: dict):
    flat = {}
    for i in dict_obj.keys():
        val = dict_obj[i]
        if not isinstance(val, dict) and not isinstance(val, list):
            flat[f'{root}_{i}'] = val
        else:
            if isinstance(val, list):
                val = val[-1]
            flat.update(flatten(f'{root}_{i}', val))
    return flat

keys_list  = []
values_list = []
for i in range(len(dc['MainName'])):  
  result = flatten('MainName', dc['MainName'][i])
  keys_list.append(list(result.keys()))
  values_list.append(list(result.values()))

for k in keys_list:
    for res in k:
      guestFile = open("sample.csv","a")
      guestFile.write(res)
      guestFile.write(",")
      guestFile.close()

for v in values_list:
    for res in v:
      guestFile = open("sample.csv","a")
      guestFile.write(str(res))
      guestFile.write(",")
      guestFile.close()

Checkout my code at https://repl.it/@TamilselvanLaks/jsontocsvmul

Note: Use the 'run' button to run the program, left side you can see sample.csv 

there you can see all keys as like you want 

Please let me know my answer meets your expectation

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM