[英]convert nested json to pandas dataframe of specific format
I pulled this nested json data from database and I think it's a list of dictionary.我从数据库中提取了这个嵌套的 json 数据,我认为这是一个字典列表。 (I am not sure, I am new to python) (我不确定,我是 python 新手)
I tried many codes posted on stack overflow but none of them addressed my particular problem, I always ran into errors...我尝试了许多关于堆栈溢出的代码,但没有一个解决了我的特定问题,我总是遇到错误......
The data is quite large, there are in total more than 100 usageId.数据比较大,总共有100多个usageId。 I'm only showing the first one, which looks like this:我只显示第一个,它看起来像这样:
[{'usageId': 'e83f43f8-ec4a-402d-a64e-d74b6f1df4a7',
'assessment_status_date': '2022-03-28',
'assessment_date': '2020-12-07',
'usage_assessment': 'Level 1',
'has_l3test': None,
'compensating_control': None,
'recommendations': None,
'test_category': {'Usage Reconciliation': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Usage Reconciliation': {'evidence_capture': None,
'test_result_justification': 'Test out of scope',
'latest_test_result_date': '2019-10-02',
'last_updated_by': None,
'test_execution_status': 'In Progress',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Agreements': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Data Agreements': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2019-10-02',
'last_updated_by': None,
'test_execution_status': 'In Progress',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Elements': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Data Elements': {'evidence_capture': None,
'test_result_justification': 'The rationale provided for why the Usage contains no HPDEs appears valid',
'latest_test_result_date': '2019-10-02',
'last_updated_by': None,
'test_execution_status': 'In Progress',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Computations': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Computations': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Lineage': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Lineage': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Metadata': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Metadata': {'evidence_capture': None,
'test_result_justification': 'Valid',
'latest_test_result_date': '2020-07-02',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Quality Monitoring': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Data Quality Monitoring': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2019-08-09',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'HPU Source Reliability': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + HPU Source Reliability': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2019-10-02',
'last_updated_by': None,
'test_execution_status': 'In Progress',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Change Notification': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Health and Welfare Plan + Change Notification': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None}},
'assessment_status': 'In Progress',
'recommendation_indicator': None,
'assessment_justification': None,
'revalidation_justification': None},
{'usageId': 'b3c9cbbd-fb72-46df-a4a3-6dd1e1edce64',
'assessment_status_date': '2022-03-28',
'assessment_date': '2020-12-07',
'usage_assessment': 'Level 1',
'has_l3test': None,
'compensating_control': None,
'recommendations': None,
'test_category': {'Usage Reconciliation': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'New or Changed Usage Reconciles with Prior Usage': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-10-23',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Agreements': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Data Agreement Reviewed and Approved in Last Year': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-07-21',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Elements': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'HPDEs Identified': {'evidence_capture': None,
'test_result_justification': 'Valid',
'latest_test_result_date': '2020-07-02',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
'HPDE Justification is Documented and Reasonable': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-02-28',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
'HPDE Identification Rationale is Valid': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-02-28',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
'Usage Output is Documented and Metadata is Registered': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-08-07',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
'Data Element Metadata is in Curated State': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': None,
'last_updated_by': 'tat00000',
'test_execution_status': 'In Progress',
'test_result': None},
'Secured Data Indicator Consistency': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None},
'HPDE Metadata is in Curated State': {'evidence_capture': None,
'test_result_justification': 'Valid',
'latest_test_result_date': '2020-07-02',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Computations': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Usage Outcome is Accurate': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Lineage': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Lineage is Accurate Reflection of Run': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None},
'Partial Lineage from Authoritiative or Acceptable Source to Originating Source Exists and is Reasonable': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None},
'Lineage from Usage to Authoritative or Acceptable Source is Complete': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None},
'Quality is Sufficient': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Metadata': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Usage Description is Valid': {'evidence_capture': None,
'test_result_justification': 'xx',
'latest_test_result_date': '2020-02-28',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
"Usage SME's EID Status is Valid": {'evidence_capture': None,
'test_result_justification': 'Valid',
'latest_test_result_date': '2020-07-02',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
'Usage AE': {'evidence_capture': None,
'test_result_justification': 'Valid',
'latest_test_result_date': '2020-07-02',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Quality Monitoring': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Data Defect Tracking Process is Reasonable': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-02-28',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
'Data Movement is Reasonable': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-10-23',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Fail'},
'DQ Rules and Thresholds are Comprehensive': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'In Progress',
'test_result': 'Fail'},
'Data Defect Tracking Process is Operating Effectively': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None},
'DQ Monitoring Plan is Reasonable': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-10-22',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'HPU Source Reliability': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Usage Consumes from Acceptable Source': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-10-23',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Fail'},
'Usage Consumes from Approved Source': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Change Notification': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Change Notification Process is Reasonable': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2020-07-21',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'},
'Change Notification Process is Operating Effectively': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None}},
'assessment_status': 'In Progress',
'recommendation_indicator': None,
'assessment_justification': None,
'revalidation_justification': None},
{'usageId': 'c67a1567-2de3-4826-97bb-99838b405acd',
'assessment_status_date': '2022-03-28',
'assessment_date': '2020-12-07',
'usage_assessment': 'Level 1',
'has_l3test': None,
'compensating_control': None,
'recommendations': None,
'test_category': {'Usage Reconciliation': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Usage Reconciliation': {'evidence_capture': None,
'test_result_justification': 'Test out of scope',
'latest_test_result_date': '2019-10-02',
'last_updated_by': None,
'test_execution_status': 'In Progress',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Agreements': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Data Agreements': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2019-10-21',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Elements': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Data Elements': {'evidence_capture': None,
'test_result_justification': 'Valid',
'latest_test_result_date': '2020-07-02',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Computations': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Computations': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Lineage': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Lineage': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Metadata': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Metadata': {'evidence_capture': None,
'test_result_justification': 'Valid',
'latest_test_result_date': '2020-07-02',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Data Quality Monitoring': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Data Quality Monitoring': {'evidence_capture': None,
'test_result_justification': 'xxx',
'latest_test_result_date': '2019-10-21',
'last_updated_by': None,
'test_execution_status': 'Completed',
'test_result': 'Pass without Compensating Controls'}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'HPU Source Reliability': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + HPU Source Reliability': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None},
'Change Notification': {'test_category_assessment_date': None,
'last_updated_by': None,
'test_execution': {'Provided Health Insur Offer & Coverage Information Returns (1094C) + Change Notification': {'evidence_capture': None,
'test_result_justification': None,
'latest_test_result_date': None,
'last_updated_by': None,
'test_execution_status': 'Not Started',
'test_result': None}},
'test_category_assessment': None,
'test_category_status': None,
'test_category_assessment_justification': None}},
'assessment_status': 'In Progress',
'recommendation_indicator': None,
'assessment_justification': None,
'revalidation_justification': None}]
I want to convert it into 2 tables.我想将其转换为 2 个表。 I've created 2 tables in excel, it'll be something like that.我在 excel 中创建了 2 个表,它会是这样的。
I apologize if I asked a stupid question or the way I formatted my question.如果我问了一个愚蠢的问题或我的问题格式,我深表歉意。
I did the following processing steps and ended up with df4
with the relevant columns of interest, please have a look:我做了以下处理步骤,最后得到了带有相关列的df4
,请看一下:
import pandas as pd
null = 'null'
data = {
"usageId": "1",
"assessment_status_date": "2022-03-28",
"assessment_date": "2020-12-07",
"usage_assessment": "Lev...<truncated> }
df1 = pd.DataFrame(d)
for usageId in set(df1['usageId']): #total more than 100 usageId
df2 = df1[df1['usageId'] == usageId] #filter based on each usageId
test_category = list(df2['test_category'][0]['test_execution'].keys())[0].split(' + ')[0]
df3 = pd.DataFrame(pd.DataFrame(df2['test_category'][0]['test_execution']))
for i in range(1, len(df2)):
df3 = pd.concat([df3, pd.DataFrame(df1['test_category'][i]['test_execution'])], axis=1)
df3.columns = [col.split(' + ')[1] for col in df3.columns]
df3 = df3.T
df3['test_category'] = test_category
df4 = pd.concat([df3, df1.drop(columns=['test_category'])], axis=1)
print(df4)
Output: Output:
evidence_capture last_updated_by latest_test_result_date test_execution_status test_result test_result_justification test_category usageId assessment_status_date assessment_date usage_assessment has_l3test compensating_control recommendations assessment_status recommendation_indicator assessment_justification revalidation_justification
Change Notification null null null Not Started null null Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
Computations null null null Not Started null null Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
Data Agreements null null 2019-10-02 In Progress null Due to the Policy Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
Data Elements null null 2019-10-02 In Progress null xxx Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
Data Quality Monitoring null null 2019-08-09 Completed xxx xxx Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
HPU Source Reliability null null 2019-10-02 In Progress null xxx. Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
Lineage null null null Not Started null null Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
Metadata null null 2020-07-02 Completed xxx Valid Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
Usage Reconciliation null null 2019-10-02 In Progress null Test out of scope Health and Welfare Plan 1 2022-03-28 2020-12-07 Level 1 null null null In Progress null null null
I solved it finally with the below code我终于用下面的代码解决了
for usage_json in assessment_json:
if 'test_category' in usage_json:
for tc in usage_json['test_category'].keys():
if 'test_execution' in usage_json['test_category'][tc]:
sub_te_df = pd.DataFrame(usage_json['test_category'][tc]['test_execution']).T
sub_te_df.reset_index(inplace=True)
sub_te_df = sub_te_df.rename(columns = {'index':'test_execution'})
sub_te_df['usageId'] = usage_json['usageId']
sub_te_df['test_category'] = tc
usage_te_df_list.append(sub_te_df)
df_execution = pd.concat(usage_te_df_list).reset_index(drop=True)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.