I have json data with nested arrays which contains same key name. My Json format is like(may vary key value pair names):
{
"name": "bharat",
"age": 27,
"vehicles": [
{
"car": "tata",
"bike": "duke",
"plane": "n",
},
{
"car": "odi",
"bike": "duke",
"plane": "n",
}]
}
I have tried Convert nested JSON to CSV file in Python but got multiple columns with same keys in vehicles.
My code is
import json
import csv
from elasticsearch import Elasticsearch
import elasticsearch.helpers
with open("query.json") as f:
query=json.load(f)
es = Elasticsearch(['http://xx.xx.xx.xx:xxxx'],verify_certs=False)
results_gen = elasticsearch.helpers.scan(
es,
query=query,
index="demo",
)
def get_leaves(item, key=None):
if isinstance(item, dict):
leaves = []
for i in item.keys():
leaves.extend(get_leaves(item[i], i))
return leaves
elif isinstance(item, list):
leaves = []
for i in item:
leaves.extend(get_leaves(i, key))
return leaves
else:
return [(key, item)]
with open('Data.csv', 'w', newline='') as f_output:
csv_output = csv.writer(f_output)
write_header = True
for entry in results_gen:
e=entry['_source']
leaf_entries = sorted(get_leaves(e))
print(leaf_entries)
if write_header:
csv_output.writerow([k for k, v in leaf_entries])
write_header = False
csv_output.writerow([v for k, v in leaf_entries])
I am getting output like
name age car car bike bike plane plane
bharat 27 tata odi duke duke n n
I expect output to be like
name age car bike plane
bharat 27 tata duke n
bharat 27 odi duke n
Something like this (Assuming you are interested in vehicles data only)
data = {"name": "bharat", "age": 27, "vehicles": [{"car": "tata", "bike": "duke", "plane": "n",
}, {"car": "odi", "bike": "duke", "plane": "n",
}]}
with open('out.csv', 'w') as out:
for v in data['vehicles']:
line = ','.join(v.values())
out.write(line + '\n')
You Can use Pandas Normalize class to normalize the json into pandas dataframe and then write the dataframe to csv
Lets say you have Json dictionaries in variable 'data' like: [{json1},{json2},{json3}...]
import json
import pandas
from pandas.io.json import json_normalize
for d in data:
normalized_data = json_normalize(d)
normalized_data .to_csv('csv_file_name', sep='|', mode='a', index=False, na_rep='', header=False)
refer: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.io.json.json_normalize.html
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.