I have a JSON file which contains the meta data for tables held within a schema.
I would like to create a dataframe for each table defined within the JSON file ie Person, HomeAddress, Employment. The Person and Employment are at the same level, but HomeAddress in nested within Person.
eg dataframe(Person)
Column_Name Type Format Required
Person_ID Integer Yes
DateOfBirth String date-time Yes
...........
The contents of the file is as follows;
{
"$id": "12121212",
"type": "object",
"properties": {
"PersonId": {
"type": "integer"
},
"Person": {
"type": ["object", "null"],
"properties": {
"PersonId": {
"type": "integer"
},
"DateOfBirth": {
"type": "string",
"format": "date-time"
},
"DateOfBirthVerified": {
"type": "boolean"
},
"Sex": {
"type": ["string", "null"]
},
"Surname": {
"type": ["string", "null"]
},
"Initials": {
"type": ["string", "null"]
},
"Forenames": {
"type": ["string", "null"]
},
"Title": {
"type": ["string", "null"]
},
"NationalIdNumber": {
"type": ["string", "null"]
},
"HomeAddress": {
"type": ["object", "null"],
"properties": {
"EffectiveDate": {
"type": "string",
"format": "date-time"
},
"EndDate": {
"type": "string",
"format": "date-time"
},
"Category": {
"type": ["string", "null"]
},
"Line1": {
"type": ["string", "null"]
},
"Line2": {
"type": ["string", "null"]
},
"Line3": {
"type": ["string", "null"]
},
"Line4": {
"type": ["string", "null"]
},
"City": {
"type": ["string", "null"]
},
"County": {
"type": ["string", "null"]
},
"Country": {
"type": ["string", "null"]
},
"CareOfAddressee": {
"type": ["string", "null"]
},
"PostCode": {
"type": ["string", "null"]
},
"SuspectAddress": {
"type": "boolean"
},
"Overseas": {
"type": "boolean"
}
},
"required": ["EffectiveDate", "EndDate", "Category", "Line1", "Line2", "Line3", "Line4", "City", "County", "Country", "CareOfAddressee", "PostCode", "SuspectAddress", "Overseas"]
}
},
"required": ["PersonId", "DateOfBirth", "DateOfBirthVerified", "Sex", "Surname", "Initials", "Forenames", "Title", "NationalIdNumber", "HomeAddress"]
},
"Employment": {
"type": ["object", "null"],
"properties": {
"EmployeeReference": {
"type": ["string", "null"]
},
"DateFirstEmployed": {
"type": "string",
"format": "date-time"
},
"PayrollNumber": {
"type": ["string", "null"]
}
},
"required": ["EmployeeReference", "DateFirstEmployed", "PayrollNumber"]
}
},
"required": ["PersonId", "Person", "Employment"]
}
Let d
be the dictionary of the file contents. Then you could address this recursively as follows:
import pandas as pd
import numpy as np
def get_props(d, required=[]):
props = []
for k, v in d.items():
if isinstance(v, dict):
if 'type' in v.keys():
props.append({
'Column_Name': k,
'Format': v['format'] if 'format' in v.keys() else np.NaN,
'Type': v['type'] if isinstance(v['type'], str) else v['type'][0],
'Required': 'Yes' if k in required else 'No'
})
props.extend(get_props(v, required=d['required'] if 'required' in d else []))
return props
df = pd.DataFrame(get_props(d))
print(df)
prints
index | Column_Name | Format | Type | Required |
---|---|---|---|---|
0 | PersonId | NaN | integer | Yes |
1 | Person | NaN | object | Yes |
2 | PersonId | NaN | integer | Yes |
3 | DateOfBirth | date-time | string | Yes |
4 | DateOfBirthVerified | NaN | boolean | Yes |
5 | Sex | NaN | string | Yes |
6 | Surname | NaN | string | Yes |
7 | Initials | NaN | string | Yes |
8 | Forenames | NaN | string | Yes |
9 | Title | NaN | string | Yes |
10 | NationalIdNumber | NaN | string | Yes |
11 | HomeAddress | NaN | object | Yes |
12 | EffectiveDate | date-time | string | Yes |
13 | EndDate | date-time | string | Yes |
14 | Category | NaN | string | Yes |
15 | Line1 | NaN | string | Yes |
16 | Line2 | NaN | string | Yes |
17 | Line3 | NaN | string | Yes |
18 | Line4 | NaN | string | Yes |
19 | City | NaN | string | Yes |
20 | County | NaN | string | Yes |
21 | Country | NaN | string | Yes |
22 | CareOfAddressee | NaN | string | Yes |
23 | PostCode | NaN | string | Yes |
24 | SuspectAddress | NaN | boolean | Yes |
25 | Overseas | NaN | boolean | Yes |
26 | Employment | NaN | object | Yes |
27 | EmployeeReference | NaN | string | Yes |
28 | DateFirstEmployed | date-time | string | Yes |
29 | PayrollNumber | NaN | string | Yes |
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.