How can i convert this multiple JSON files into one CSV file

Question

Hey i have a dataset with multiple JSON files and i want to convert them into one csv file to do some preprocessing and than put the csv into an database but i have some problems to convert the the JSON structure. The JSON files have the following pattern:

{
    "info": {
        "generated_on": "2017-12-03 08:41:42.057563", 
        "slice": "0-999", 
        "version": "v1"
    }, 
    "playlists": [
        {
            "name": "Throwbacks", 
            "collaborative": "false", 
            "pid": 0, 
            "modified_at": 1493424000, 
            "num_tracks": 52, 
            "num_albums": 47, 
            "num_followers": 1, 
            "tracks": [
                {
                    "pos": 0, 
                    "artist_name": "Missy Elliott", 
                    "track_uri": "spotify:track:0UaMYEvWZi0ZqiDOoHU3YI", 
                    "artist_uri": "spotify:artist:2wIVse2owClT7go1WT98tk", 
                    "track_name": "Lose Control (feat. Ciara & Fat Man Scoop)", 
                    "album_uri": "spotify:album:6vV5UrXcfyQD1wu4Qo2I9K", 
                    "duration_ms": 226863, 
                    "album_name": "The Cookbook"
                }, 
                {
                    "pos": 1, 
                    "artist_name": "Britney Spears", 
                    "track_uri": "spotify:track:6I9VzXrHxO9rA9A5euc8Ak", 
                    "artist_uri": "spotify:artist:26dSoYclwsYLMAKD3tpOr4", 
                    "track_name": "Toxic", 
                    "album_uri": "spotify:album:0z7pVBGOD7HCIB7S8eLkLI", 
                    "duration_ms": 198800, 
                    "album_name": "In The Zone"
                },

i tried this code in python:

import os
import json
import csv

def get_list_of_json_files():
    list_of_files = os.listdir('../../spotify_million_playlist_dataset/data')
    return list_of_files

#print(get_list_of_json_files())
def create_list_from_json(jsonfile):

    with open(jsonfile) as f:
        data = json.load(f)
    data_list = []  # create an empty list

    # append the items to the list in the same order.
    data_list.append(data['info']['generated_on'])
    data_list.append(data['info']['slice'])
    data_list.append(data['info']['version'])
    
    data_list.append(data['playlists']['name'])
    data_list.append(data['playlists']['collaborative'])
    data_list.append(data['playlists']['pid'])
    data_list.append(data['playlists']['modified_at'])
    data_list.append(data['playlists']['num_tracks'])
    data_list.append(data['playlists']['num_albums'])
    data_list.append(data['playlists']['num_followers'])
    data_list.append(data['playlists']['tracks']['pos'])
    data_list.append(data['playlists']['tracks']['artist_name'])
    data_list.append(data['playlists']['tracks']['track_uri'])
    data_list.append(data['playlists']['tracks']['artist_uri'])
    data_list.append(data['playlists']['tracks']['track_name'])
    data_list.append(data['playlists']['tracks']['album_uri'])
    data_list.append(data['playlists']['tracks']['duration_ms'])
    data_list.append(data['playlists']['tracks']['album_name'])
    # In few json files, the race was not there so using KeyError exception to add '' at the place
    #try:
    #    data_list.append(data['meta']['unstructured']['race'])
    #except KeyError:
    #    data_list.append("")  # will add an empty string in case race is not there.
    #data_list.append(data['name'])

    return data_list

def write_csv():
    list_of_files = get_list_of_json_files()
    for file in list_of_files:
        row = create_list_from_json(f'../../spotify_million_playlist_dataset/data/{file}')  # create the row to be added to csv for each file (json-file)
        with open('output.csv', 'a') as c:
            writer = csv.writer(c)
            writer.writerow(row)
        c.close()

if __name__=="__main__":
    write_csv()

but i keep getting this error: "TypeError: list indices must be integers or slices, not str", any help is very appreciated, thanks

Answer 1

playlists is a list, even in your json file, and you are trying to access data like from a dictionary. This is why you are getting the error. You can solve this by implementing a simple for loop:

for i in data['playlists']:
  data_list.append(i['name'])
  data_list.append(i['collaborative'])
  ...

How can i convert this multiple JSON files into one CSV file

Question

1 answers

solution1
0 2021-12-15 15:34:41

How can i convert this multiple JSON files into one CSV file

Question

1 answers

solution1 0 2021-12-15 15:34:41

solution1
0 2021-12-15 15:34:41