import os
import json
import csv
import re
subdir = "./json_files/" #'/home/varun/Desktop/pyfile'
def jsontocsv():
with open ('test.csv', 'w') as outfile:
fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for file in os.listdir(subdir):
file_path = os.path.join(subdir, file)
with open(file_path, 'r') as json_file:
parsed_json = json.load(json_file)
with open ('test.csv', 'a') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(parsed_json.values())
def cleanUnicode():
with open ('data.csv', 'w') as outfile:
fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
with open('test.csv', 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
rows = list(reader)
for row in rows[1:]:
row = str(row)
row = re.sub(r'u', r'', row)
print(row)
# with open ('data.csv', 'a') as csvfile:
# fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
# writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# writer.writerow(row)
# os.remove('test.csv')
if __name__ == '__main__':
jsontocsv()
cleanUnicode()
print("Scripts finished running all json files parsed to csv")
I am reading from multiple json files into a single csv file, getting the data in a single csv file but it has 'u for every nested values. How can I remove these and keep only the data I want?
Sample Input:
{
"version": "0.1.0",
"devDependencies": {
"react-scripts": "0.6.1"
},
"dependencies": {
"crossfilter": "^1.3.12",
"d3": "^4.2.6",
"d3-scale": "^1.0.3",
"dc": "^2.0.0-beta.32",
"immutable": "^3.8.1",
"jszip": "^3.1.2",
"react": "^15.3.2",
"react-addons-transition-group": "^15.3.2",
"react-dom": "^15.3.2",
"shifty": "^1.5.2",
"wolfy87-eventemitter": "^5.1.0"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test --env=jsdom",
"eject": "react-scripts eject"
}
}
Output:
version,dependencies,scripts,devDependencies
0.1.0,"{u'wolfy87-eventemitter': u'^5.1.0', u'shifty': u'^1.5.2', u'react-addons-transition-group': u'^15.3.2', u'react-dom': u'^15.3.2', u'dc': u'^2.0.0-beta.32', u'ccbooleananalysis': u'^1.0.0', u'react': u'^15.3.2', u'jszip': u'^3.1.2', u'crossfilter': u'^1.3.12', u'ccnetviz': u'^1.0.8', u'immutable': u'^3.8.1', u'd3': u'^4.2.6', u'd3-scale': u'^1.0.3'}","{u'test': u'react-scripts test --env=jsdom', u'start': u'react-scripts start', u'build': u'react-scripts build', u'eject': u'react-scripts eject'}",{u'react-scripts': u'0.6.1'}
Desired all u to be replaced
I'm not sure why you want to write dictionaries as strings into your CSV file, but anyway...
Here's one way to get strings without the u
Unicode prefix. We process the dictionary that was created by loading the JSON data, encoding all key and value strings to UTF-8; any values that are dictionaries are processed recursively.
This works fine on pure ASCII data. However, any data that's outside the 7-bit ASCII range will be encoded as \\x
escape sequences. That's not really a problem though. When you read the CSV file you will probably want to convert those strings back into proper dictionaries. You can use ast.literal_eval
for that, and it will happily accept \\x
escape sequences.
To verify that this code handles Unicode, I've added an extra item to your test data. The "devDependencies" dict now contains a new item: "unicode-test", which has a value of "™©". In the final section of my code I read the CSV data back in, convert the "devDependencies" string back into a dict, and print that dict's 'unicode-test' field to verify that it gets converted back into the correct Unicode string.
BTW, I mostly use Python 3.6 these days, and the most recent version of Python 2 I have is 2.6.6. Its csv
module doesn't have a DictWriter.writeheader
method, so I use an alternative way of writing the header row.
import json
import csv
import ast
csvname = 'test.csv'
src = '''\
{
"version": "0.1.0",
"devDependencies": {
"unicode-test": "™©",
"react-scripts": "0.6.1"
},
"dependencies": {
"crossfilter": "^1.3.12",
"d3": "^4.2.6",
"d3-scale": "^1.0.3",
"dc": "^2.0.0-beta.32",
"immutable": "^3.8.1",
"jszip": "^3.1.2",
"react": "^15.3.2",
"react-addons-transition-group": "^15.3.2",
"react-dom": "^15.3.2",
"shifty": "^1.5.2",
"wolfy87-eventemitter": "^5.1.0"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test --env=jsdom",
"eject": "react-scripts eject"
}
}
'''
data = json.loads(src)
encoding = 'utf8'
def encode_dict(d):
newd = {}
for k, v in d.iteritems():
if isinstance(v, dict):
v = encode_dict(v)
else:
v = v.encode(encoding)
newd[k.encode(encoding)] = v
return newd
clean_data = encode_dict(data)
print clean_data
print '- ' * 20
fieldnames = ['version', 'dependencies', 'scripts', 'devDependencies']
with open(csvname, 'wb') as outfile:
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
#writer.writeheader()
# Write header, the old-fashioned way
writer.writerow(dict((s, s) for s in fieldnames))
writer.writerow(clean_data)
# Verify
with open(csvname, 'rb') as infile:
reader = csv.DictReader(infile)
for row in reader:
print row
s = row['devDependencies']
d = ast.literal_eval(s)
print d['unicode-test']
output
{'devDependencies': {'unicode-test': '\xe2\x84\xa2\xc2\xa9', 'react-scripts': '0.6.1'}, 'version': '0.1.0', 'dependencies': {'wolfy87-eventemitter': '^5.1.0', 'react-addons-transition-group': '^15.3.2', 'react-dom': '^15.3.2', 'd3-scale': '^1.0.3', 'dc': '^2.0.0-beta.32', 'jszip': '^3.1.2', 'react': '^15.3.2', 'crossfilter': '^1.3.12', 'shifty': '^1.5.2', 'd3': '^4.2.6', 'immutable': '^3.8.1'}, 'scripts': {'test': 'react-scripts test --env=jsdom', 'start': 'react-scripts start', 'build': 'react-scripts build', 'eject': 'react-scripts eject'}}
- - - - - - - - - - - - - - - - - - - -
{'devDependencies': "{'unicode-test': '\\xe2\\x84\\xa2\\xc2\\xa9', 'react-scripts': '0.6.1'}", 'version': '0.1.0', 'dependencies': "{'wolfy87-eventemitter': '^5.1.0', 'react-addons-transition-group': '^15.3.2', 'react-dom': '^15.3.2', 'd3-scale': '^1.0.3', 'dc': '^2.0.0-beta.32', 'jszip': '^3.1.2', 'react': '^15.3.2', 'crossfilter': '^1.3.12', 'shifty': '^1.5.2', 'd3': '^4.2.6', 'immutable': '^3.8.1'}", 'scripts': "{'test': 'react-scripts test --env=jsdom', 'start': 'react-scripts start', 'build': 'react-scripts build', 'eject': 'react-scripts eject'}"}
™©
contents of test.csv
version,dependencies,scripts,devDependencies
0.1.0,"{'wolfy87-eventemitter': '^5.1.0', 'react-addons-transition-group': '^15.3.2', 'react-dom': '^15.3.2', 'd3-scale': '^1.0.3', 'dc': '^2.0.0-beta.32', 'jszip': '^3.1.2', 'react': '^15.3.2', 'crossfilter': '^1.3.12', 'shifty': '^1.5.2', 'd3': '^4.2.6', 'immutable': '^3.8.1'}","{'test': 'react-scripts test --env=jsdom', 'start': 'react-scripts start', 'build': 'react-scripts build', 'eject': 'react-scripts eject'}","{'unicode-test': '\xe2\x84\xa2\xc2\xa9', 'react-scripts': '0.6.1'}"
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.