简体   繁体   中英

Converting large, ill-formatted .json file to csv

I have very little experience with python and .json files. I want to convert a large .json file that I recieved from someone else into a .csv file for use in excel.

The file is formatted as such:

{
"bedrock": {
    "basinAge": "",
    "basinName": "",
    "basinSetting": "",
    "basinSource": "",
    "basinType": "",
    "division": "ATLANTICPLAIN",
    "primary": "ATLANTICPLAIN",
    "province": "COASTALPLAIN",
    "section": "MISSISSIPPIALLUVIALPLAIN"
},
"country": "US",
"county": "ButlerCounty",
"crow": {
    "PERIOD_RAN": "",
    "SITE_PERIO": "",
    "SURFACE_EL": "",
    "VS30_RANGE": "",
    "ZDRIFT": "",
    "ZONE": "",
    "ZPLEIS": "",
    "Zhol": "",
    "condition": "",
    "firmThickness": "",
    "geobed": "",
    "geodes": "",
    "geophone": "",
    "meas_type": "",
    "resonance": "",
    "sitelocation": "",
    "sitenumber": "",
    "sitevs30": "",
    "slope": "",
    "slopevel": "",
    "soilThickness": "",
    "veltofirm": "",
    "vs30": ""
},
"embaymentDepth": 27.176477284750096,
"file": "../../data\\anderson\\anderson-et-al-2003-MoDOT.json",
"geologicClass": "YNa",
"geology": "al",
"geologySource": "fullerton",
"lat": 36.790518,
"latlon": [
    [
        "36.7905",
        "-90.2025"
    ],
    "232.0000",
    0.00172447,
    "stable"
],
"location": "BridgeA-3709",
"lon": -90.202518,
"profile": {
    "entry": {
        "0": [
            0,
            146.185,
            "Empty"
        ],
        "1": [
            2.91874,
            194.378,
            "Empty"
        ],
        "2": [
            4.11277,
            228.112,
            "Empty"
        ],
        "3": [
            6.10282,
            221.687,
            "Empty"
        ],
        "4": [
            7.9602,
            221.687,
            "Empty"
        ],
        "5": [
            8.09287,
            220.08,
            "Empty"
        ],
        "6": [
            8.09287,
            216.867,
            "Empty"
        ],
        "7": [
            14.063,
            260.241,
            "Empty"
        ],
        "8": [
            18.0431,
            279.518,
            "Empty"
        ],
        "9": [
            22.1559,
            282.731,
            "Empty"
        ],
        "10": [
            26.0033,
            281.124,
            "Empty"
        ],
        "11": [
            29.9834,
            276.305,
            "Empty"
        ],
        "12": [
            36.0862,
            293.976,
            "Empty"
        ],
        "13": [
            41.9237,
            435.341,
            "Empty"
        ],
        "14": [
            48.0265,
            557.43,
            "Empty"
        ],
        "15": [
            54.1294,
            640.964,
            "Empty"
        ],
        "16": [
            59.8342,
            726.104,
            "Empty"
        ],
        "17": [
            68.1924,
            "Empty",
            "Empty"
        ]
    },
    "units": [
        "m",
        "m/s",
        "m/s"
    ]
},
"sedEnd": "",
"sedStack": "",
"sedStart": "",
"sedSubsurface": "",
"sedSurficial": "",
"sedVaneer": "",
"site": "SASW",
"state": "MO",
"terrain": "16",
"terrainvel": "246",
"vs30": {
    "profileListed": {
        "units": "",
        "value": "None"
    },
    "siteListed": {},
    "stationlisted": {
        "method": "",
        "units": "",
        "value": ""
    },
    "units": "m/s",
    "value": 232.2477304197259,
    "wald": "",
    "yong": ""
},
"vsz": [
    146.185,
    146.185,
    147.1733748014587,
    156.68616932663514,
    166.6758658515508,
    174.5091277144315,
    180.04135355419726,
    184.37079145300547,
    187.52878874899267,
    190.10050728694824,
    192.25770054815115,
    194.09311720243602,
    195.6737567374426,
    197.04922523230243,
    200.16214171750644,
    203.0924940564577,
    205.75028418598887,
    208.17185007705515,
    210.97976183739868,
    213.5984956154859,
    216.02447901610586,
    218.27823757348278,
    220.44997122220872,
    222.4921122273311,
    224.40458487503645,
    226.19935932262254,
    227.84822291575128,
    229.40085613024158,
    230.86555428020594,
    232.2477304197259,
    265.0897348970574,
    "",
    "",
    "",
    "",
    "",
    ""
]
}

There are 1000 entries such as the one above, each with the same keys. After doing some research online, I am pretty sure I need to flatten the entries, but am not sure how to do that programatically. There are certain categorical indicators that have a series of keys after them ('bedrock','crow',etc) that can be discarded if necessary.

Definitely the first step is to parse the file with a JSON parser. Then write code that looks at the resulting dictionary and extracts the data.

I'm not sure what you mean by "ill-formatted"; it looks like valid JSON. If you have trouble parsing it with Python's json module, you might try processing it with Python's yaml module. YAML is a superset of JSON but is more forgiving of little formatting things like non-needed commas.

http://pymotw.com/2/json/

https://pypi.python.org/pypi/PyYAML

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM