This is incredibly basic: I had successfully used python for parsing json files in a single directory several months ago. But now I can't figure out how I tweaked it (a teammate came up with the code) so that I can get the data into a more useable csv format.
At the moment, I'm getting zilch when using Python Launcher or Terminal.
What the parser looks like:
import codecs
import json
import os
import sys
try:
import unicodecsv as csv
except ImportError:
import csv
OUTPUT_FILE = 'output.csv'
def process_file(infile, writer):
print('Processing file: %s' % infile)
with codecs.open(infile, encoding='utf-8') as infile:
data = json.load(infile)
for item in data:
_id = item['id']
description = item['description']
for gov in item['source']:
gov_id = gov['name']
for source in item['secondarySource']:
source_id = source['sourceId']
name = source['name']
party = source['party']
writer.writerow([_id, description, gov_id, source_id, name, party])
def process_files_in_directory(directory, outfile):
with codecs.open(outfile, 'w') as outfile:
writer = csv.writer(outfile)
writer.writerow(["id", "description", "branch", "sourceID", "name", "party"])
for f in os.listdir(path):
if f.endswith('.json'):
process_file(f, writer)
USAGE = """
Usage:
python json_parser.py <source_directory> [<output_file>]
Where source_directory is path to directory with input JSON files.
output_file is optional -- defaults to %s
File names must end with .json
""" % OUTPUT_FILE
if __name__=='__main__':
try:
directory = sys.argv[1]
except IndexError:
print(USAGE)
sys.exit(0)
if len(sys.argv) > 2:
outfile = sys.argv[2]
else:
outfile = OUTPUT_FILE
process_files_in_directory(directory, outfile)
Your script has some formatting issues. I'm not sure if they are related to the issues... here's a new version of your script. The basic idea works, but you might want to format the CSV output to make it even more readable. To prove this works I've run this from the command-line with:
python stackoverflow\junk.py stackoverflow\mydir
Where stackoverflow\\mydir
has two files: one.json
and two.json
.
The code below includes the fix from my comment above
import codecs
import json
import os
import sys
try:
import unicodecsv as csv
except ImportError:
import csv
OUTPUT_FILE = 'output.csv'
def process_file(infile, writer):
print('Processing file: %s' % infile)
with codecs.open(infile, encoding='utf-8') as infile:
data = json.load(infile)
for item in data:
_id = item['id']
description = item['description']
for gov in item['source']:
gov_id = gov['name']
for source in item['secondarySource']:
source_id = source['sourceId']
name = source['name']
party = source['party']
writer.writerow([_id, description, gov_id, source_id, name, party])
def process_files_in_directory(directory, outfile):
with codecs.open(outfile, 'w') as outfile:
writer = csv.writer(outfile)
writer.writerow(["id", "description", "branch", "sourceID", "name", "party"])
for f in os.listdir(directory):
if f.endswith('.json'):
process_file(os.path.join(directory, f), writer)
USAGE = """
Usage:
python json_parser.py <source_directory> [<output_file>]
Where source_directory is path to directory with input JSON files.
output_file is optional -- defaults to %s
File names must end with .json
""" % OUTPUT_FILE
if __name__ == '__main__':
try:
directory = sys.argv[1]
except IndexError:
print(USAGE)
sys.exit(0)
if len(sys.argv) > 2:
outfile = sys.argv[2]
else:
outfile = OUTPUT_FILE
process_files_in_directory(directory, outfile)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.