[英]Extract specific JSON keys and convert to CSV in Python
我正在使用以下代码将几个 JSON 文件转换为 CSV,它按预期工作,但它转换了 JSON 文件中的所有数据。 相反,我希望它执行以下操作:
import json, pandas
from flatten_json import flatten
# Enter the path to the JSON and the filename without appending '.json'
file_path = r'C:\Path\To\file_name'
# Open and load the JSON file
dic = json.load(open(file_path + '.json', 'r', encoding='utf-8', errors='ignore'))
# Flatten and convert to a data frame
dic_flattened = (flatten(d, '.') for d in dic)
df = pandas.DataFrame(dic_flattened)
# Export to CSV in the same directory with the original file name
export_csv = df.to_csv (file_path + r'.csv', sep=',', encoding='utf-8', index=None, header=True)
在底部的示例中,我只想要以下键下的所有内容: created
、 emails
和identities
。 rest 是无用的信息(例如statusCode
)或在不同的键名下重复(例如profile
和userInfo
)。
我知道它需要一个for
循环和if
语句来稍后指定键名,但不确定实现它的最佳方法。 到目前为止,当我想测试它时,这是我所拥有的:
import json, pandas
from flatten_json import flatten
# Enter the path to the JSON and the filename without appending '.json'
file_path = r'C:\Path\To\file_name'
# Open and load the JSON file
json_file = open(file_path + '.json', 'r', encoding='utf-8', errors='ignore')
dic = json.load(json_file)
# List keys to extract
key_list = ['created', 'emails', 'identities']
for d in dic:
#print(d['identities']) #Print all 'identities'
#if 'identities' in d: #Check if 'identities' exists
if key_list in d:
# Flatten and convert to a data frame
#dic_flattened = (flatten(d, '.') for d in dic)
#df = pandas.DataFrame(dic_flattened)
else:
# Skip
# Export to CSV in the same directory with the original file name
#export_csv = df.to_csv (file_path + r'.csv', sep=',', encoding='utf-8', index=None, header=True)
这是正确的逻辑吗?
file_name.json
示例[
{
"callId": "abc123",
"errorCode": 0,
"apiVersion": 2,
"statusCode": 200,
"statusReason": "OK",
"time": "2020-12-14T12:00:32.744Z",
"registeredTimestamp": 1417731582000,
"UID": "_guid_abc123==",
"created": "2014-12-04T22:19:42.894Z",
"createdTimestamp": 1417731582000,
"data": {},
"preferences": {},
"emails": {
"verified": [],
"unverified": []
},
"identities": [
{
"provider": "facebook",
"providerUID": "123",
"allowsLogin": true,
"isLoginIdentity": true,
"isExpiredSession": true,
"lastUpdated": "2014-12-04T22:26:37.002Z",
"lastUpdatedTimestamp": 1417731997002,
"oldestDataUpdated": "2014-12-04T22:26:37.002Z",
"oldestDataUpdatedTimestamp": 1417731997002,
"firstName": "John",
"lastName": "Doe",
"nickname": "John Doe",
"profileURL": "https://www.facebook.com/John.Doe",
"age": 30,
"birthDay": 31,
"birthMonth": 12,
"birthYear": 1969,
"city": "City, State",
"education": [
{
"school": "High School Name",
"schoolType": "High School",
"degree": null,
"startYear": 0,
"fieldOfStudy": null,
"endYear": 0
}
],
"educationLevel": "High School",
"followersCount": 0,
"gender": "m",
"hometown": "City, State",
"languages": "English",
"locale": "en_US",
"name": "John Doe",
"photoURL": "https://graph.facebook.com/123/picture?type=large",
"timezone": "-8",
"thumbnailURL": "https://graph.facebook.com/123/picture?type=square",
"username": "john.doe",
"verified": "true",
"work": [
{
"companyID": null,
"isCurrent": null,
"endDate": null,
"company": "Company Name",
"industry": null,
"title": "Company Title",
"companySize": null,
"startDate": "2010-12-31T00:00:00"
}
]
}
],
"isActive": true,
"isLockedOut": false,
"isRegistered": true,
"isVerified": false,
"lastLogin": "2014-12-04T22:26:33.002Z",
"lastLoginTimestamp": 1417731993000,
"lastUpdated": "2014-12-04T22:19:42.769Z",
"lastUpdatedTimestamp": 1417731582769,
"loginProvider": "facebook",
"loginIDs": {
"emails": [],
"unverifiedEmails": []
},
"rbaPolicy": {
"riskPolicyLocked": false
},
"oldestDataUpdated": "2014-12-04T22:19:42.894Z",
"oldestDataUpdatedTimestamp": 1417731582894
"registered": "2014-12-04T22:19:42.956Z",
"regSource": "",
"socialProviders": "facebook"
}
]
正如juanpa.arrivillaga所提到的,我只需要在key_list
之后添加以下行:
json_list = [{k:d[k] for k in key_list} for d in json_list]
这是完整的工作代码:
import json, pandas
from flatten_json import flatten
# Enter the path to the JSON and the filename without appending '.json'
file_path = r'C:\Path\To\file_name'
# Open and load the JSON file
json_list = json.load(open(file_path + '.json', 'r', encoding='utf-8', errors='ignore'))
# Extract data from the defined key names
key_list = ['created', 'emails', 'identities']
json_list = [{k:d[k] for k in key_list} for d in json_list]
# Flatten and convert to a data frame
json_list_flattened = (flatten(d, '.') for d in json_list)
df = pandas.DataFrame(json_list_flattened)
# Export to CSV in the same directory with the original file name
export_csv = df.to_csv (file_path + r'.csv', sep=',', encoding='utf-8', index=None, header=True)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.