My question is essentially the opposite of this one:
Create a Pandas DataFrame from deeply nested JSON
I'm wondering if it's possible to do the reverse. Given a table like:
Library Level School Major 2013 Total
200 MS_AVERY UGRAD GENERAL STUDIES GEST 5079
201 MS_AVERY UGRAD GENERAL STUDIES HIST 5
202 MS_AVERY UGRAD GENERAL STUDIES MELC 2
203 MS_AVERY UGRAD GENERAL STUDIES PHIL 10
204 MS_AVERY UGRAD GENERAL STUDIES PHYS 1
205 MS_AVERY UGRAD GENERAL STUDIES POLS 53
Is it possible to generate a nested dict (or JSON) like:
dict:
{'MS_AVERY':
{ 'UGRAD' :
{'GENERAL STUDIES' : {'GEST' : 5}
{'MELC' : 2}
...
It seems not hard to create a function will build the recursive dictionary given your DataFrame
object:
def fdrec(df):
drec = dict()
ncols = df.values.shape[1]
for line in df.values:
d = drec
for j, col in enumerate(line[:-1]):
if not col in d.keys():
if j != ncols-2:
d[col] = {}
d = d[col]
else:
d[col] = line[-1]
else:
if j!= ncols-2:
d = d[col]
return drec
which will produce:
{'MS_AVERY':
{'UGRAD':
{'GENERAL STUDIES': {'PHYS': 1L,
'POLS': 53L,
'PHIL': 10L,
'HIST': 5L,
'MELC': 2L,
'GEST': 5079L}}}}
Here's a solution I came up while working on this question :
def rollup_to_dict_core(x, values, columns, d_columns=None):
if d_columns is None:
d_columns = []
if len(columns) == 1:
if len(values) == 1:
return x.set_index(columns)[values[0]].to_dict()
else:
return x.set_index(columns)[values].to_dict(orient='index')
else:
res = x.groupby([columns[0]] + d_columns).apply(lambda y: rollup_to_dict_core(y, values, columns[1:]))
if len(d_columns) == 0:
return res.to_dict()
else:
res.name = columns[1]
res = res.reset_index(level=range(1, len(d_columns) + 1))
return res.to_dict(orient='index')
def rollup_to_dict(x, values, d_columns=None):
if d_columns is None:
d_columns = []
columns = [c for c in x.columns if c not in values and c not in d_columns]
return rollup_to_dict_core(x, values, columns, d_columns)
>>> pprint(rollup_to_dict(df, ['2013 Total']))
{'MS_AVERY': {'UGRAD': {'GENERAL STUDIES': {'GEST': 5079,
'HIST': 5,
'MELC': 2,
'PHIL': 10,
'PHYS': 1,
'POLS': 53}}}}
key = ['Library', 'Level', 'School']
series = (df.groupby(key, sort=False)[df.columns.difference(key)]
.apply(lambda x: x[['Major', '2013 Total']].to_dict('records'))
)
# build: {Major: Total}
major = {}
values = series.values[0]
for i in range(len(values)):
major.update({values[i]['Major']: values[i]['2013 Total']})
# build the recursive dictionary
index = series.index[0]
d = {}
for i in reversed(range(len(index))):
if not bool(d):
d = {index[i]: major}
else:
d = {index[i]: d}
print(json.dumps(d, indent=2))
It will produce:
{
"MS_AVERY": {
"UGRAD": {
"GENERAL STUDIES": {
"GEST": 5079,
"HIST": 5,
"MELC": 2,
"PHIL": 10,
"PHYS": 1,
"POLS": 53
}
}
}
}
Here is a generic way to generate this format, might be what someone else is looking for. Desired format:
{ "data":
[
{
"NAME": [1, 2, 3]
},
{
"NAME": [1, 2, 3]
},
]
}
To get that:
import json
jsonstr = '{"data":['
for (columnName, columnData) in df.iteritems():
jsonstr+='{"'
jsonstr+=columnName
jsonstr+='":'
jsonstr+=json.dumps(list(columnData.values))
jsonstr+='},'
jsonstr = jsonstr[:-1]
jsonstr+=']}'
jsonobject = json.loads(jsonstr)
jsonobject
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.