after two almost three days of programming, I am seeking for help.
What I want to do: Import Json string/ file and write it to a database. There are multiple combination of the input (cars). To lowest nested dict/ list defines the number of elements of a list which will be written in the DB.
Here is the JSON/ String:
input = [{"id":"BMW_1_Series",
"years":[{"id":10052,"year":2008,
"styles":[{"id":560,"name":"128i 2",
"submodel":{"body":"Conver","niceName":"conve"},"trim":"128i"},
{"id":561,"name":"135i ",
"submodel":{"body":"Conver","niceName":"conver"},"trim":"135i"}
]
},
{"id":427,"year":2016,
"styles":[{"id":433,"name":"228i ",
"submodel":{"body":"Conve", "niceName":"conver"},"trim":"228i SULEV"},
{"id":431,"name":"M235i",
"submodel":{"body":"Coupe", "niceName":"m235i"},"trim":"M235i"}
]
}
]
}
#I deleted the other entries, but it is a list
]
The output should be a dict and the keys for a subdict should be a prefix for the sub-key:
{'id': 427, 'year': 2016, 'styles_id': 431, 'styles_name': 'M235i', 'styles_trim': 'M235i', 'submodel_body': 'Coupe', 'submodel_niceName': 'm235i'}
I got it to work from years onwards:
for s in years:#styles
outputA ={}
for specifierA, valueA in s.items():
if isinstance(valueA, list):
for A in valueA:
if isinstance(A, dict):
outputB = {}
for specifierB, valueB in A.items(): #submodel
if isinstance(valueB, dict):
for specifierBB, valueBB in valueB.items():
outputA[specifierB+"__"+specifierBB]= valueBB
else:
outputA[specifierA+"_"+specifierB]= valueB
else:
outputA[specifierA] = valueA
elif isinstance(valueA, dict):
outputB = {}
for specifierB, valueB in valueA.items(): #submodel
outputA[specifierA+"_"+specifierB]= valueB
else:
outputA[specifierA] = valueA
print(str(outputA))
output.append(outputA)
I stoped here as I wanted to have a general case on how to read JSON files containing dicts, list and normal values.
My approach for that so far (it is not working and I spent three days on it... (I delted all the intermediate prints for easier reading) *Note: COde is a continuous loop...
def readL(input, prefix=""):
x = True
output={}
k=0
while (x):
k+=1
x=False
if isinstance(input, list):
print("LIST: "+str(input))
for A in input:
if isinstance(A, dict):
output = dict(output, **readL(A))
elif isinstance(A, list):
output = dict(output, **readL(A))
elif isinstance(input, dict):
for specifierB, valueB in input.items(): #submodel
if isinstance(valueB, dict):
specifierB = str(specifierB)+"_"
output = dict(output, **readL(valueB,specifierB))
if isinstance(valueB, list):
specifierB = str(specifierB)+"_"
output = dict(output, **readL(valueB,specifierB))
spec = prefix+specifierB
output[spec] = valueB
#check if Output Dict contains list or Dict -> continue loop
for specifierT, valueT in output.items():
if isinstance(valueT, dict) or isinstance(valueT, list):
x = True
if k ==1:# I have a continues loop so far, thats why
break
return(output)
readL(test)
Basically, I want to have a recursive function that continuesly calls itself until no dict/ list is in the output anymore.
I am also open to any other faster methods on how to read the input.
I am very desperately looking forward for some advice. Please bear with me, i am pretty new to Python.
THANKS A LOT!
UPDATE
I found a partial solution at Flattening Generic JSON List of Dicts or Lists in Python from @poke
def splitObj (obj, prefix = None):
'''
Split the object, returning a 3-tuple with the flat object, optionally
followed by the key for the subobjects and a list of those subobjects.
'''
# copy the object, optionally add the prefix before each key
new = obj.copy() if prefix is None else { '{}_{}'.format(prefix, k): v for k, v in obj.items() }
# try to find the key holding the subobject or a list of subobjects
for k, v in new.items():
# list of subobjects
if isinstance(v, list):
del new[k]
return new, k, v
# or just one subobject
elif isinstance(v, dict):
del new[k]
return new, k, [v]
return new, None, None
def flatten (data, prefix = None):
'''
Flatten the data, optionally with each key prefixed.
'''
# iterate all items
for item in data:
# split the object
flat, key, subs = splitObj(item, prefix)
# just return fully flat objects
if key is None:
yield flat
continue
# otherwise recursively flatten the subobjects
for sub in flatten(subs, key):
sub.update(flat)
yield sub
But as my input has changed as well a bit:
input = [{'states': ['USED'], 'niceName': '1-series', 'id': 'BMW_1_Series',
'years': [{'styles':
[{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 100994560},
{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 100974974},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 100974975},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 100994561}
],
'states': ['USED'], 'id': 100524709, 'year': 2008},
{'styles':
[{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101082656},
{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101082655},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101082663},
{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101082662}
],
'states': ['USED'], 'id': 100503222, 'year': 2009},
{'styles':
[{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101200599},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101200600},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101200607},
{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101200601}
],
'states': ['USED'], 'id': 100529091, 'year': 2010},
{'styles':
[{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101288165},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101288166},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101288298},
{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101288297}
],
'states': ['USED'], 'id': 100531309, 'year': 2011},
{'styles':
[{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 101381667},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 101381668},
{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 101381665},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 101381666}
],
'states': ['USED'], 'id': 100534729, 'year': 2012},
{'styles':
[{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '128i 2dr Coupe (3.0L 6cyl 6M)', 'id': 200428722},
{'trim': '128i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '128i 2dr Convertible (3.0L 6cyl 6M)', 'id': 200428721},
{'trim': '135is', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '135is 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 200421701},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '135i 2dr Coupe (3.0L 6cyl Turbo 6M)', 'id': 200428724},
{'trim': '135i', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '135i 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 200428723},
{'trim': '128i SULEV', 'states': ['USED'], 'submodel': {'body': 'Coupe', 'niceName': 'coupe', 'modelName': '1 Series Coupe'},
'name': '128i SULEV 2dr Coupe (3.0L 6cyl 6M)', 'id': 200428726},
{'trim': '128i SULEV', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '128i SULEV 2dr Convertible (3.0L 6cyl 6M)', 'id': 200428725},
{'trim': '135is', 'states': ['USED'], 'submodel': {'body': 'Convertible', 'niceName': 'convertible', 'modelName': '1 Series Convertible'},
'name': '135is 2dr Convertible (3.0L 6cyl Turbo 6M)', 'id': 200428727}
],
'states': ['USED'], 'id': 200421700, 'year': 2013}
],
'name': '1 Series', 'make': {'niceName': 'bmw', 'name': 'BMW', 'id': 200000081}
}]
But I receive the error:
AttributeError: 'str' object has no attribute 'update'
as it is not handling 'states': ['USED']
i think
What can I do about it?
I moved this problem to Flatten nested JSON (Dict, List) into List to prepare to write into DB Please comment there. Thanks!
I don't think it's the neatest solution, but I think this does what you want it to do:
def flatten(something,parent_key=None):
if parent_key==None:
prefix = ""
else:
prefix = parent_key+"_"
if type(something) == type({}):
temp={}
for key in something:
temp.update(flatten(something[key],prefix+key))
return temp
elif type(something) == type([]):
temp = {}
for index in range(len(something)):
temp.update(flatten(something[index],prefix+str(index)))
return temp
else:
return {parent_key:something}
then just run it like:
if __name__=='__main__':
input_list = [{"id":"BMW_1_Series",
"years":[{"id":10052,"year":2008,
"styles":[{"id":560,"name":"128i 2",
"submodel":{"body":"Conver","niceName":"conve"},"trim":"128i"},
{"id":561,"name":"135i ",
"submodel":{"body":"Conver","niceName":"conver"},"trim":"135i"}
]
},
{"id":427,"year":2016,
"styles":[{"id":433,"name":"228i ",
"submodel":{"body":"Conve", "niceName":"conver"},"trim":"228i SULEV"},
{"id":431,"name":"M235i",
"submodel":{"body":"Coupe", "niceName":"m235i"},"trim":"M235i"}
]
}
]
}
#I deleted the other entries, but it is a list
]
a = flatten(input_list)
print(a)
if you can guarantee an id in each list you can use the id in place of the index:
temp.update(flatten(something[index],prefix+str(something[index]['id'])))
I had the same need, to both flatten and denormalize arbitrary JSON - including those without a pre-defined schema. I also wanted to make sure that the one-to-one and one-to-many relationship between all elements in the JSON tree were preserved in the flattened/denormalized version.
So far, this has worked for all use cases I've tried (with files with one JSON record per line, not for multi-line JSON).
Here is another way of doing it. Closer to your requirements.
from pprint import pprint
input = [{"id":"BMW_1_Series",
"years":[{"id":10052,"year":2008,
"styles":[{"id":560,"name":"128i 2",
"submodel":{"body":"Conver","niceName":"conve"},"trim":"128i"},
{"id":561,"name":"135i ",
"submodel":{"body":"Conver","niceName":"conver"},"trim":"135i"}
]
},
{"id":427,"year":2016,
"styles":[{"id":433,"name":"228i ",
"submodel":{"body":"Conve", "niceName":"conver"},"trim":"228i SULEV"},
{"id":431,"name":"M235i",
"submodel":{"body":"Coupe", "niceName":"m235i"},"trim":"M235i"}
]
}
]
}
#I deleted the other entries, but it is a list
]
ds=input[0]["years"][0]
pprint(ds)
def flatten_ds(ds, final_ds={}, prepend_tag=None):
if type(ds) is dict:
for k, v in ds.iteritems():
if not (type(v) is list or type(v) is dict):
if prepend_tag:
k = prepend_tag + '_' + k
final_ds[k] = v
else:
final_ds = flatten_ds(v, prepend_tag=k, final_ds=final_ds)
return final_ds
return final_ds
if type(ds) is list:
for d in ds:
final_ds = flatten_ds(d, prepend_tag=prepend_tag, final_ds=final_ds)
return final_ds
pprint(flatten_ds(ds))
If I understand you correctly, you are trying to flatten your JSON, but you only want to flatten it when you encounter a certain key or keys. If that's the case you can try this:
Given:
from pprint import pprint
input_list = [{"id":"BMW_1_Series",
"years":[{"id":10052,"year":2008,
"styles":[{"id":560,"name":"128i 2",
"submodel":{"body":"Conver","niceName":"conve"},"trim":"128i"},
{"id":561,"name":"135i ",
"submodel":{"body":"Conver","niceName":"conver"},"trim":"135i"}
]
},
{"id":427,"year":2016,
"styles":[{"id":433,"name":"228i ",
"submodel":{"body":"Conve", "niceName":"conver"},"trim":"228i SULEV"},
{"id":431,"name":"M235i",
"submodel":{"body":"Coupe", "niceName":"m235i"},"trim":"M235i"}
]
}
]
}
#I deleted the other entries, but it is a list
]
Use the functions:
def flatten(something,parent_key=None):
if parent_key==None:
prefix = ""
else:
prefix = parent_key+"_"
if type(something) == type({}):
temp={}
for key in something:
temp.update(flatten(something[key],prefix+key))
return temp
elif type(something) == type([]):
temp = {}
for index in range(len(something)):
temp.update(flatten(something[index],prefix+str(index)))
# temp.update(flatten(something[index],prefix+str(something[index]['id'])))
return temp
else:
return {parent_key:something}
def sometimes_flatten(something, flatten_keys):
if type(something) == type({}):
temp={}
for key in something:
if key in flatten_keys:
temp.update(flatten(something[key],key))
else:
temp.update({key:sometimes_flatten(something[key],flatten_keys)})
return temp
elif type(something) == type([]):
return [sometimes_flatten(x,flatten_keys) for x in something]
else:
return something
then call them here:
if __name__=='__main__':
flatten_keys = ['years']
a = sometimes_flatten(input_list,flatten_keys)
pprint(a)
but you can put any of the keys into the flatten_keys
variable to change how it behaves and how much flattening it will do
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.