Python: How to take the first value of a column of multiple .csv files + its name and make a new file with them

Question

I have multiple time series CSV dataframes built as follows:

Ex: 1.csv

,close,high,low,open,time,volumefrom,volumeto,timestamp
0,0.7,2.0,0.7,1.1,1499990400,49.17,78.14,2017-07-14 02:00:00
1,1.98,1.98,0.7,0.7,1500076800,5.69,9.93,2017-07-15 02:00:00
...

What I'd like to do is to create an output.xls file as follows:

Filename, time
1, 1499990400
2, ...
...

where Filename is the csv name (for example 1.csv , 2.csv , and so on), and time is the time of the first row of each files.

I managed to set the code, but there's something wrong with it. This is my try:

import glob

%cd /Users/Files/Daily/

output = open('output.csv', 'w')
output.write('filename\n; timestamp')
for filename in glob.glob('*.csv'):
  if filename == 'output.csv':
    continue
  with open(filename, 'r') as input:

I can't continue without getting an error. Thank you in advance.

Answer 1

Here is a pandas solution, first we create the data:

data = '''\
,close,high,low,open,time,volumefrom,volumeto,timestamp
0,0.7,2.0,0.7,1.1,1499990400,49.17,78.14,2017-07-14 02:00:00
1,1.98,1.98,0.7,0.7,1500076800,5.69,9.93,2017-07-15 02:00:00'''

with open('test1.csv','w') as f1, open('test2.csv','w') as f2:
    f1.write(data)
    f2.write(data)

And now to the actual code you can use:

import glob
import os
import pandas as pd

# Get a generator with the files
files = ((i,os.path.splitext(i)[0]) for i in glob.glob('*.csv') if i != 'output.csv')

# Create the dataframe
df = pd.concat(pd.read_csv(f).iloc[:1,:].assign(filename=i) for f,i in files)

# Output
df[['filename','time']].to_csv('output.csv',index=False)

Returns:

filename,time
test1,1499990400
test2,1499990400

Answer 2

Hi I don't know Pandas enough to give you an answer but I can give you an answer using the csv module.

I'm not sure if the random data I generate match your data, tough:

import os.path
import random
import datetime
import csv

import glob

output_directory = "/Users/Files/Daily"

def create_files_with_random_values(nb_files, nb_rows_in_output_file):
    """Create for us, a number of files with random values"""

    for file_number_for_name in range(nb_files):
        random_content_filename = os.path.join(output_directory, "{}.csv".format(file_number_for_name + 1))

        # Choose a random date after July 14th 2017
        start_date = datetime.datetime(2017, 7, 14, 2,0,0) + datetime.timedelta(random.randrange(23))

        with open(random_content_filename, 'w', newline='') as random_content_file:
            random_writer = csv.writer(random_content_file)

            # Write the first row
            random_writer.writerow(('', 'close', 'high', 'low',
                                    'open', 'time', 'volumefrom',
                                    'volumeto', 'timestamp'))
            # Write the rest of the rows using a generator expression
            random_writer.writerows((x,
                      round(random.uniform(0, 2), 2),
                      round(random.uniform(0, 2), 2),
                      round(random.uniform(0, 2), 2),
                      "".join(random.choices("0123456789", k=10)),
                      round(random.uniform(0, 100), 2),
                      round(random.uniform(0, 100), 2),
                      (start_date + datetime.timedelta(x)).isoformat(' ')
                      )
                     for x in range(nb_rows_in_output_file)
                     )

create_files_with_random_values(30, 25)

output_filename = os.path.join(output_directory, "output.csv")
file_finder_pattern = os.path.join(output_directory, "*.csv")

with open(output_filename, "w", newline='') as output_file:
    output_writer = csv.writer(output_file)
    output_writer.writerow(('Filename', 'time'))

    # Create a list containing couples containing the original file name
    # and the first part of the file name (without its path)
    files_wanted = [(x, os.path.splitext(os.path.basename(x))[0]) for x in glob.iglob(file_finder_pattern)
                    if x != output_filename]
    # Sort that list on the first part of the file name (without its path)
    # using a lambda function
    files_wanted.sort(key=lambda x: int(x[1]))

    for (input_filename, first_part_filename) in files_wanted:
        with open(input_filename, "r", newline='') as input_file:
            input_reader = csv.reader(input_file)
            next(input_reader)      # skip the header and don't keep its value
            first_data_row = next(input_reader)     # get row

        # Write the first part of the file name (without its path) and
        # the time component of the first row of this file
        output_writer.writerow((first_part_filename, first_data_row[4]))

I'm way passed my bedtime, so if it's not the right answer, you will have to give more details about your input data and what you want as output.

Python: How to take the first value of a column of multiple .csv files + its name and make a new file with them

Question

2 answers

solution1
0 ACCPTED 2018-01-02 18:24:15

solution2
0 2018-01-04 05:52:38

Python: How to take the first value of a column of multiple .csv files + its name and make a new file with them

Question

2 answers

solution1 0 ACCPTED 2018-01-02 18:24:15

solution2 0 2018-01-04 05:52:38

solution1
0 ACCPTED 2018-01-02 18:24:15

solution2
0 2018-01-04 05:52:38