[英]Normalizing deeply nested JSON in Python incorrectly yielding hundreds of columns
I'm trying to normalize the following JSON:我正在尝试规范化以下 JSON:
{
"href": "url1",
"items": [
{
"count": 6,
"items": [
{
"href": "url2",
"count": 2,
"items": [
{
"href": "url3",
"description": {
"year": 2017,
"make": "ACURA",
"model": "ILX",
"trim": "4D SEDAN ACURAWATCH PLUS",
"subSeries": "ACURAWATCH"
},
"adjustedPricing": {
"wholesale": {
"above": 18050,
"average": 15650,
"below": 13250
},
"adjustedBy": {
"Odometer": "30352"
}
},
"wholesale": {
"above": 17600,
"average": 15200,
"below": 12800
},
"averageOdometer": 36939,
"odometerUnits": "miles",
"averageGrade": 43,
"currency": "USD",
"sampleSize": "6"
},
{
"href": "url4",
"description": {
"year": 2017,
"make": "ACURA",
"model": "ILX",
"trim": "4D SEDAN",
"subSeries": "NONE"
},
"adjustedPricing": {
"wholesale": {
"above": 17950,
"average": 16600,
"below": 15250
},
"adjustedBy": {
"Odometer": "30352"
}
},
"wholesale": {
"above": 17900,
"average": 16550,
"below": 15200
},
"averageOdometer": 31235,
"odometerUnits": "miles",
"averageGrade": 37,
"currency": "USD",
"sampleSize": "47"
}
]
},
{
"href": "url5",
"count": 2,
"items": [
{
"href": "url6",
"description": {
"year": 2017,
"make": "ACURA",
"model": "ILX",
"trim": "4D SEDAN PREMIUM",
"subSeries": "PREMIUM"
},
"adjustedPricing": {
"wholesale": {
"above": 16200,
"average": 14700,
"below": 13200
},
"adjustedBy": {
"Odometer": "53824"
}
},
"wholesale": {
"above": 18500,
"average": 16950,
"below": 15450
},
"averageOdometer": 30090,
"odometerUnits": "miles",
"averageGrade": 36,
"currency": "USD",
"sampleSize": "15"
},
{
"href": "url7",
"description": {
"year": 2017,
"make": "ACURA",
"model": "ILX",
"trim": "4D SEDAN TECH PKG",
"subSeries": "TECH PKG"
},
"adjustedPricing": {
"wholesale": {
"above": 17850,
"average": 15950,
"below": 14050
},
"adjustedBy": {
"Odometer": "53824"
}
},
"wholesale": {
"above": 19800,
"average": 17850,
"below": 15950
},
"averageOdometer": 27247,
"odometerUnits": "miles",
"averageGrade": 37,
"currency": "USD",
"sampleSize": "21"
}
]
},
{
"href": "url8",
"count": 1,
"items": [
{
"href": "url9",
"description": {
"year": 2017,
"make": "ACURA",
"model": "MDX AWD",
"trim": "4D SUV 3.5L",
"subSeries": "NONE"
},
"adjustedPricing": {
"wholesale": {
"above": 28900,
"average": 27400,
"below": 25900
},
"adjustedBy": {
"Odometer": "23753"
}
},
"wholesale": {
"above": 28000,
"average": 26500,
"below": 25000
},
"averageOdometer": 34427,
"odometerUnits": "miles",
"averageGrade": 36,
"currency": "USD",
"sampleSize": "79",
"bestMatch": true
}
]
},
{
"href": "url10",
"count": 1,
"items": [
{
"href": "url11",
"description": {
"year": 2017,
"make": "ACURA",
"model": "MDX AWD",
"trim": "4D SUV 3.5L",
"subSeries": "NONE"
},
"adjustedPricing": {
"wholesale": {
"above": 27200,
"average": 25700,
"below": 24200
},
"adjustedBy": {
"Odometer": "43735"
}
},
"wholesale": {
"above": 28000,
"average": 26500,
"below": 25000
},
"averageOdometer": 34427,
"odometerUnits": "miles",
"averageGrade": 36,
"currency": "USD",
"sampleSize": "79",
"bestMatch": true
}
]
},
{
"href": "url12",
"count": 1,
"items": [
{
"href": "url13",
"description": {
"year": 2017,
"make": "ACURA",
"model": "MDX AWD",
"trim": "4D SUV 3.5L",
"subSeries": "NONE"
},
"adjustedPricing": {
"wholesale": {
"above": 27700,
"average": 26200,
"below": 24700
},
"adjustedBy": {
"Odometer": "38097"
}
},
"wholesale": {
"above": 28000,
"average": 26500,
"below": 25000
},
"averageOdometer": 34427,
"odometerUnits": "miles",
"averageGrade": 36,
"currency": "USD",
"sampleSize": "79",
"bestMatch": true
}
]
},
{
"href": "url14",
"count": 1,
"items": [
{
"href": "url15",
"description": {
"year": 2017,
"make": "ACURA",
"model": "MDX AWD",
"trim": "4D SUV 3.5L",
"subSeries": "NONE"
},
"adjustedPricing": {
"wholesale": {
"above": 28100,
"average": 26500,
"below": 25000
},
"adjustedBy": {
"Odometer": "33627"
}
},
"wholesale": {
"above": 28000,
"average": 26500,
"below": 25000
},
"averageOdometer": 34427,
"odometerUnits": "miles",
"averageGrade": 36,
"currency": "USD",
"sampleSize": "79",
"bestMatch": true
}
]
}
]
}
],
"errors": [
{
"count": 0,
"errors": []
}
]
}
I've tried using Pandas' json_normalize function which returns 1 row with 3 columns (href, items, errors).我试过使用 Pandas 的 json_normalize function ,它返回 1 行 3 列(href、项目、错误)。 I also tried plugging this into every solution from Normalize a deeply nested json in pandas but it ends up returning a dataframe with hundreds of columns.
我还尝试将其插入到Normalize a deep nested json in pandas 的每个解决方案中,但它最终返回一个包含数百列的 dataframe。 Does anyone know how to correctly flatten this JSON so that it can be entered into SQL?
有谁知道如何正确展平这个 JSON 以便可以输入 SQL?
For recursive structure like this, you can use recursion to flatten it.对于这样的递归结构,您可以使用递归来展平它。
js
is the sample JSON you provided (had to change true
to True
) js
是您提供的示例 JSON (必须将true
更改为True
)pop()
it so it's not appended to list
being built.pop()
它因此它不会附加到正在构建的list
中。 Do recursive call to all elements in list.list
list
def flat(js, l, level=0):
i = {}
if "items" in js.keys():
i = js.pop("items")
for js2 in i:
flat(js2, l, level+1)
l.append({**{"level":level, "haschild":True}, **js})
else:
l.append({**{"level":level, "haschild":False}, **js})
return l
l = []
l =flat(js, l)
df = pd.json_normalize(l)
print(df.iloc[:,:10].to_string(index=False))
sample output样品 output
level haschild href averageOdometer odometerUnits averageGrade currency sampleSize description.year description.make
3 False url3 36939.0 miles 43.0 USD 6 2017.0 ACURA
3 False url4 31235.0 miles 37.0 USD 47 2017.0 ACURA
2 True url2 NaN NaN NaN NaN NaN NaN NaN
3 False url6 30090.0 miles 36.0 USD 15 2017.0 ACURA
3 False url7 27247.0 miles 37.0 USD 21 2017.0 ACURA
2 True url5 NaN NaN NaN NaN NaN NaN NaN
3 False url9 34427.0 miles 36.0 USD 79 2017.0 ACURA
2 True url8 NaN NaN NaN NaN NaN NaN NaN
3 False url11 34427.0 miles 36.0 USD 79 2017.0 ACURA
2 True url10 NaN NaN NaN NaN NaN NaN NaN
3 False url13 34427.0 miles 36.0 USD 79 2017.0 ACURA
2 True url12 NaN NaN NaN NaN NaN NaN NaN
3 False url15 34427.0 miles 36.0 USD 79 2017.0 ACURA
2 True url14 NaN NaN NaN NaN NaN NaN NaN
1 True NaN NaN NaN NaN NaN NaN NaN NaN
0 True url1 NaN NaN NaN NaN NaN NaN NaN
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.