[英]convert deeply nested JSON response to pandas dataframe
我對數據科學有點陌生。 我正在從事一個項目,我正在從 API 調用中收集數據,該調用返回以下 JSON 響應:
{
"jsonrpc": "2.0",
"result": {
"class": "dataset",
"dimension": {
"STATISTIC": {
"category": {
"index": ["DHA10C1"],
"label": { "DHA10C1": "Births" },
"unit": {
"DHA10C1": { "decimals": 0, "label": "Number", "position": "end" }
}
},
"label": "Statistic"
},
"TLIST(A1)": {
"category": {
"index": ["2015", "2016", "2017", "2018"],
"label": {
"2015": "2015",
"2016": "2016",
"2017": "2017",
"2018": "2018"
}
},
"label": "Year"
},
"C02842V03416": {
"category": {
"index": ["-"],
"label": { "-": "All counties and regions" }
},
"label": "Area of Residence of Mother"
},
"C02025V02453": {
"category": {
"index": ["1", "2", "-"],
"label": {
"1": "Single - never married",
"2": "Married",
"-": "All marital status"
}
},
"label": "Martial Status of Mother"
},
"C02199V02655": {
"category": { "index": ["-"], "label": { "-": "Both sexes" } },
"label": "Sex of Infant"
},
"C02076V02508": {
"category": {
"index": [
"222",
"365",
"410",
"440",
"460",
"475",
"489",
"999",
"-",
"X001"
],
"label": {
"222": "0 - 19 years",
"365": "20 - 24 years",
"410": "25 - 29 years",
"440": "30 - 34 years",
"460": "35 - 39 years",
"475": "40 - 44 years",
"489": "44 years and over",
"999": "Age not stated",
"-": "All ages",
"X001": "Missing"
}
},
"label": "Age Group of Mother"
}
},
"extension": {
"matrix": "DHA10",
"reasons": [],
"language": { "code": "en", "name": "English" },
"contact": { "name": "", "email": "info@health.gov.ie", "phone": "" },
"subject": { "code": 50, "value": "Department of Health" },
"product": { "code": "DH", "value": "Health Statistics" },
"official": true,
"copyright": {
"name": "Department of Health",
"code": "DOH",
"href": "https://www.gov.ie/en/organisation/department-of-health/"
},
"exceptional": false,
"reservation": false,
"archive": false,
"experimental": false,
"analytical": false
},
"href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/JSON-stat/2.0/en",
"id": [
"STATISTIC",
"TLIST(A1)",
"C02842V03416",
"C02025V02453",
"C02199V02655",
"C02076V02508"
],
"label": "Births",
"link": {
"alternate": [
{
"type": "text/csv",
"href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/CSV/1.0/en"
},
{
"type": "application/json",
"href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/JSON-stat/1.0/en"
},
{
"type": "application/octet-stream",
"href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/PX/2013/en"
},
{
"type": "application/base64",
"href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/XLSX/2007/en"
}
]
},
"note": [
"Department of Health statistics hosted by the CSO. Any comments or queries can be sent to [url=mailto:info@health.gov.ie]info@health.gov.ie[/url]"
],
"role": { "metric": ["STATISTIC"], "time": ["TLIST(A1)"] },
"size": [1, 4, 1, 3, 1, 10],
"updated": "2021-02-25T11:00:00.000Z",
"value": [
1199.0,
5705.0,
12322.0,
23684.0,
18451.0,
3955.0,
220.0,
null,
65536.0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1199.0,
5705.0,
12322.0,
23684.0,
18451.0,
3955.0,
220.0,
null,
65536.0,
null,
1101.0,
5217.0,
11357.0,
23012.0,
18775.0,
4079.0,
300.0,
null,
63841.0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1101.0,
5217.0,
11357.0,
23012.0,
18775.0,
4079.0,
300.0,
null,
63841.0,
null,
1037.0,
5115.0,
10779.0,
21652.0,
18943.0,
3970.0,
322.0,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
1037.0,
5115.0,
10779.0,
21652.0,
18943.0,
3970.0,
322.0,
null,
null,
null,
837.0,
4088.0,
6247.0,
6351.0,
4250.0,
1190.0,
86.0,
null,
null,
null,
140.0,
966.0,
4175.0,
14574.0,
14680.0,
3148.0,
241.0,
null,
null,
null,
977.0,
5054.0,
10422.0,
20925.0,
18930.0,
4338.0,
327.0,
null,
null,
null
],
"version": "2.0"
},
"id": null
}
當我嘗試用上述響應形成 pandas dataframe 時,問題就出現了,因為 JSON 響應嵌套得很深。 我嘗試了以下代碼片段
df = pd.json_normalize(data['result'])
它返回以下結果
我只希望 dataframe 看起來像這樣
任何幫助表示贊賞。
在像你一樣規范化 json 之后,你也許可以在每一列上使用“explode”(將列表中的每個元素轉換為一行):
df.explode('size').reset_index(drop=True)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.