Hey i have a dataset with multiple JSON files and i want to convert them into one csv file to do some preprocessing and than put the csv into an database but i have some problems to convert the the JSON structure. The JSON files have the following pattern:
{
"info": {
"generated_on": "2017-12-03 08:41:42.057563",
"slice": "0-999",
"version": "v1"
},
"playlists": [
{
"name": "Throwbacks",
"collaborative": "false",
"pid": 0,
"modified_at": 1493424000,
"num_tracks": 52,
"num_albums": 47,
"num_followers": 1,
"tracks": [
{
"pos": 0,
"artist_name": "Missy Elliott",
"track_uri": "spotify:track:0UaMYEvWZi0ZqiDOoHU3YI",
"artist_uri": "spotify:artist:2wIVse2owClT7go1WT98tk",
"track_name": "Lose Control (feat. Ciara & Fat Man Scoop)",
"album_uri": "spotify:album:6vV5UrXcfyQD1wu4Qo2I9K",
"duration_ms": 226863,
"album_name": "The Cookbook"
},
{
"pos": 1,
"artist_name": "Britney Spears",
"track_uri": "spotify:track:6I9VzXrHxO9rA9A5euc8Ak",
"artist_uri": "spotify:artist:26dSoYclwsYLMAKD3tpOr4",
"track_name": "Toxic",
"album_uri": "spotify:album:0z7pVBGOD7HCIB7S8eLkLI",
"duration_ms": 198800,
"album_name": "In The Zone"
},
i tried this code in python:
import os
import json
import csv
def get_list_of_json_files():
list_of_files = os.listdir('../../spotify_million_playlist_dataset/data')
return list_of_files
#print(get_list_of_json_files())
def create_list_from_json(jsonfile):
with open(jsonfile) as f:
data = json.load(f)
data_list = [] # create an empty list
# append the items to the list in the same order.
data_list.append(data['info']['generated_on'])
data_list.append(data['info']['slice'])
data_list.append(data['info']['version'])
data_list.append(data['playlists']['name'])
data_list.append(data['playlists']['collaborative'])
data_list.append(data['playlists']['pid'])
data_list.append(data['playlists']['modified_at'])
data_list.append(data['playlists']['num_tracks'])
data_list.append(data['playlists']['num_albums'])
data_list.append(data['playlists']['num_followers'])
data_list.append(data['playlists']['tracks']['pos'])
data_list.append(data['playlists']['tracks']['artist_name'])
data_list.append(data['playlists']['tracks']['track_uri'])
data_list.append(data['playlists']['tracks']['artist_uri'])
data_list.append(data['playlists']['tracks']['track_name'])
data_list.append(data['playlists']['tracks']['album_uri'])
data_list.append(data['playlists']['tracks']['duration_ms'])
data_list.append(data['playlists']['tracks']['album_name'])
# In few json files, the race was not there so using KeyError exception to add '' at the place
#try:
# data_list.append(data['meta']['unstructured']['race'])
#except KeyError:
# data_list.append("") # will add an empty string in case race is not there.
#data_list.append(data['name'])
return data_list
def write_csv():
list_of_files = get_list_of_json_files()
for file in list_of_files:
row = create_list_from_json(f'../../spotify_million_playlist_dataset/data/{file}') # create the row to be added to csv for each file (json-file)
with open('output.csv', 'a') as c:
writer = csv.writer(c)
writer.writerow(row)
c.close()
if __name__=="__main__":
write_csv()
but i keep getting this error: "TypeError: list indices must be integers or slices, not str", any help is very appreciated, thanks
playlists
is a list, even in your json file, and you are trying to access data like from a dictionary. This is why you are getting the error. You can solve this by implementing a simple for
loop:
for i in data['playlists']:
data_list.append(i['name'])
data_list.append(i['collaborative'])
...
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.