繁体   English   中英

如何从巨大的json文件中解析并获取具体数据实现python中的搜索

[英]How to parse and get specific data from a huge json file to implement search in python

我有一个包含大量信息的 json 文件,所以我试图在有 position 的地方提取特定数据,我需要获取直接名称数据,还尝试在 python 中实现搜索。我正在上传样本的一部分json 文件中的数据 ex.json

`

{
  "storables": [
    {
      "columns": [
        {
          "position": 0,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "CAT",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",
          "sageOutputColumnId": "",
          "defaultAggrType": "NONE",
          "ownerName": "",
          "ownerType": "WORKSHEET",
          "entityCategory": "DEFAULT",
          "spotiqPreference": "DEFAULT",
          "isAdditive": false,
          "indexType": "DEFAULT",
          "indexPriority": 1,
          "sources": [
            {
              "tableId": "",
              "tableName": "",
              "columnId": "",
              "columnName": "CATASTROPHE"
            }
          ],
          "synonyms": [],
          "injectedInlineValues": [],
          "precision": -1,
          "scale": 0,
          "isPrimaryKey": false,
          "isAttributionDimension": true,
          "derivationExpr": {
            "exprType": "LOGICAL_COLUMN_REFERENCE",
            "logicalColumn": {
              "header": {
                "id": "",
                "indexVersion": 35499,
                "generationNum": 35499,
                "name": "CATASTROPHE",
                "author": "",
                "created": 1630716505804,
                "modified": 1668211006637,
                "modifiedBy": "",
                "owner": "",
                "isDeleted": false,
                "isHidden": false,
                "schemaStripe": "",
                "databaseStripe": "",
                "tags": [],
                "isExternal": false,
                "isDeprecated": false
              }
            },
            "joinPaths": [
              {
                "joins": [
                  {
                    "sourceTable": "",
                    "destinationTable": "",
                    "content": {
                      "relationships": [
                        {
                          "sourceColumn": "",
                          "destinationColumn": ""
                        }
                      ],
                      "weight": 1
                    },
                    "joinType": "INNER",
                    "type": "USER_DEFINED",
                    "isOneToOneJoin": false,
                    "header": {
                      "id": "",
                      "indexVersion": 35499,
                      "generationNum": 35499,
                      "name": "",
                      "description": "",
                      "author": "",
                      "created": 1650658367043,
                      "modified": 1668211006686,
                      "modifiedBy": "",
                      "owner": "",
                      "isDeleted": false,
                      "isHidden": false,
                      "tags": [],
                      "type": "USER_DEFINED",
                      "isExternal": false,
                      "isDeprecated": false
                    },
                    "complete": true,
                    "incompleteDetail": [],
                    "sourceColumns": [
                      ""
                    ],
                    "targetColumns": [
                      ""
                    ]
                  }
                ]
              }
            ]
          }
        },
        {
          "position": 1,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "Peril",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",
          "sageOutputColumnId": "",
          "defaultAggrType": "NONE",
          "ownerName": "",
          "ownerType": "WORKSHEET",
          "entityCategory": "DEFAULT",
          "spotiqPreference": "DEFAULT",
          "isAdditive": false,
          "indexType": "DEFAULT",
          "indexPriority": 1,
          "sources": [
            {
              "tableId": "",
              "tableName": "",
              "columnId": "",
              "columnName": "TYPE_OF"
            }
          ],
          "synonyms": [],
          "injectedInlineValues": [],
          "precision": -1,
          "scale": 0,
          "isPrimaryKey": false,
          "isAttributionDimension": true,
          "derivationExpr": {
            "exprType": "LOGICAL_COLUMN_REFERENCE",
            "logicalColumn": {
              "header": {
                "id": "",
                "indexVersion": 35499,
                "generationNum": 35499,
                "name": "TYPE_OF",
                "author": "",
                "created": 1630716505804,
                "modified": 1668211006637,
                "modifiedBy": "",
                "owner": "",
                "isDeleted": false,
                "isHidden": false,
                "schemaStripe": "",
                "databaseStripe": "",
                "tags": [],
                "isExternal": false,
                "isDeprecated": false
              }
            },
            "joinPaths": [
              {
                "joins": [
                  {
                    "sourceTable": "",
                    "destinationTable": "",
                    "content": {
                      "relationships": [
                        {
                          "sourceColumn": "",
                          "destinationColumn": ""
                        }
                      ],
                      "weight": 1
                    },
                    "joinType": "INNER",
                    "type": "USER_DEFINED",
                    "isOneToOneJoin": false,
                    "header": {
                      "id": "",
                      "indexVersion": 35499,
                      "generationNum": 35499,
                      "name": "",
                      "description": "Copy of user table relationship",
                      "author": "",
                      "created": 1650658367043,
                      "modified": 1668211006686,
                      "modifiedBy": "",
                      "owner": "",
                      "isDeleted": false,
                      "isHidden": false,
                      "tags": [],
                      "type": "USER_DEFINED",
                      "isExternal": false,
                      "isDeprecated": false
                    },
                    "complete": true,
                    "incompleteDetail": [],
                    "sourceColumns": [
                      ""
                    ],
                    "targetColumns": [
                      ""
                    ]
                  }
                ]
              }
            ]
          }
        },
        {
          "position": 2,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "Job",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",
          "sageOutputColumnId": "",
          "defaultAggrType": "NONE",
          "ownerName": "",
          "ownerType": "WORKSHEET",
          "entityCategory": "DEFAULT",
          "spotiqPreference": "DEFAULT",
          "isAdditive": false,
          "indexType": "DEFAULT",
          "indexPriority": 1,
          "sources": [
            {
              "tableId": "",
              "tableName": "",
              "columnId": "",
              "columnName": ""
            }
          ],
          "synonyms": [],
          "injectedInlineValues": [],
          "precision": -1,
          "scale": 0,
          "isPrimaryKey": false,
          "isAttributionDimension": true,
          "derivationExpr": {
            "exprType": "LOGICAL_COLUMN_REFERENCE",
            "logicalColumn": {
              "header": {
                "id": "",
                "indexVersion": 35499,
                "generationNum": 35499,
                "name": "ROTATION_TRADE",
                "author": "",
                "created": 1630716505804,
                "modified": 1668211006637,
                "modifiedBy": "",
                "owner": "",
                "isDeleted": false,
                "isHidden": false,
                "schemaStripe": "",
                "databaseStripe": "",
                "tags": [],
                "isExternal": false,
                "isDeprecated": false
              }
            },
            "joinPaths": [
              {
                "joins": [
                  {
                    "sourceTable": "",
                    "destinationTable": "",
                    "content": {
                      "relationships": [
                        {
                          "sourceColumn": "",
                          "destinationColumn": ""
                        }
                      ],
                      "weight": 1
                    },
                    "joinType": "INNER",
                    "type": "USER_DEFINED",
                    "isOneToOneJoin": false,
                    "header": {
                      "id": "",
                      "indexVersion": 35499,
                      "generationNum": 35499,
                      "name": "",
                      "description": "Copy of user table relationship",
                      "author": "",
                      "created": 1650658367043,
                      "modified": 1668211006686,
                      "modifiedBy": "",
                      "owner": "",
                      "isDeleted": false,
                      "isHidden": false,
                      "tags": [],
                      "type": "USER_DEFINED",
                      "isExternal": false,
                      "isDeprecated": false
                    },
                    "complete": true,
                    "incompleteDetail": [],
                    "sourceColumns": [
                      ""
                    ],
                    "targetColumns": [
                      ""
                    ]
                  }
                ]
              }
            ]
          }
        },
        {
          "position": 3,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "Job Lenghth",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",

`

`

with open('ex.json', 'r') as f:
    for line in f:
        if 'position' in line:
            for line in f: 
                if ' name: ' in line:
                    print(line)

` 我尝试了这段 python 代码,但它不起作用。 我不确定如何只返回 position 之后的直接名称。文件中有多个名称实例,但我只需要 position 之后的那个...

import json
with open('ex.json', 'r') as f:
    data = json.load(f)

现在您可以访问所有 json 项目,就像您从数据变量访问 python 中的任何字典/对象一样

您的代码可能有效,但您需要稍微更改一下逻辑。 这是解决方案的快速草图:

prevWasPosition = False
with open('ex.json', 'r') as f:
    for line in f:
        if '"position":' in line:
            prevWasPosition = True
            continue

        if prevWasPosition and '"name":' in line:
            print(line)
        prevWasPosition = False

请注意,此解决方案基于 json 文件格式正确的假设。 如果不是,您可能会得到意想不到的结果。 更强的解决方案是逐块使用读取文件并将其解析为 json,但它超出了此答案的 scope。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM