This is the code for processing one file.
import pandas as pd
import numpy as npimport pandas as pd
inputfile = open('dataset1.csv', 'r')
df = pd.read_csv("cleaning/semicleaned1.csv", sep=",",
names["press", "gph", "temp","wspd"])
df = df.drop(['gph'], axis=1)
df.to_csv("cleaning/FINAL.csv", sep=',', index=False)
To process all CSV files in your cleaning
folder, you could use to glob
to get you a list. I would suggest creating an output filename based on your input filename, eg prepending cleaned_
to the filename:
import pandas as pd
import glob
import os
for csv_filename in glob.glob('cleaning/*.csv'):
# Create a suitable output filename based on the input filename
split = list(os.path.split(csv_filename))
name, ext = os.path.splitext(split[-1])
split[-1] = 'cleaned_{}{}'.format(name, ext)
cleaned_filename = os.path.join(*split)
print('{} -> {}'.format(csv_filename, cleaned_filename))
# Process CSV
df = pd.read_csv(csv_filename, names=["press", "gph", "temp", "wspd"])
df = df.drop(['gph'], axis=1)
df.to_csv(cleaned_filename, sep=',', index=False)
Note, care will be needed if your files have headers.
You could use a multiprocessing.Pool()
to run this in parallel:
from multiprocessing import Pool
import pandas as pd
import glob
import os
def clean_csv(csv_filename):
# Create a suitable output filename based on the input filename
split = list(os.path.split(csv_filename))
name, ext = os.path.splitext(split[-1])
split[-1] = 'cleaned_{}{}'.format(name, ext)
cleaned_filename = os.path.join(*split)
print('{} -> {}'.format(csv_filename, cleaned_filename))
# Process CSV
df = pd.read_csv(csv_filename, names=["press", "gph", "temp", "wspd"])
df = df.drop(['gph'], axis=1)
df.to_csv(cleaned_filename, sep=',', index=False)
if __name__ == '__main__':
with Pool(10) as pool:
print(pool.map(clean_csv, glob.glob('cleaning/*.csv')))
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.