I have to read a csv file that's generated by a third party and contains a mixture of strings, integers and prices (sometimes with $ signs) into a XLSX file. This is the sample data that's stored in the csv file, a_test_f.csv, that I've got:
ColA,ColB
1,$11.00
2,22
3,"$1,000.56"
4,44
and here is the code that I've written. My question is, is this the most efficient way of performing this conversion. Is there an alternative method that would use less processing power / memory? This is especially important given that the real csv file will contain thousands of records and hundreds of columns and the conversion operation will have to be performed tens of thousands of times per day.
import csv
import openpyxl
#
# Convert the data in csv file format that contains a mix of
# strings, integers and dollar amounts into xlsx file format
#
csvfile = 'a_test_f.csv'
xlsxfile = 'new_xlsx_f.xlsx'
wb = openpyxl.Workbook()
ws = wb.active
# remove $ and , from numbers
class Clean:
def __init__(self, data=''):
self.__obj = data
def __repr__(self):
return f"{self.__obj}"
def getData(self):
return self.__obj
def dollar(self):
try:
return Clean(data=self.__obj.replace('$',''))
except TypeError as err:
print(err)
def comma(self):
try:
return Clean(data=self.__obj.replace(',',''))
except TypeError as err:
print(err)
def digit(self):
try:
float(self.__obj)
return True
except ValueError:
return False
with open(csvfile) as f:
reader = csv.reader(f, delimiter=',', quotechar='"')
row_count=1
for row in reader:
for i in range(len(row)):
if Clean(data=row[i]).dollar().comma().digit():
content = float(repr(Clean(data=row[i]).dollar().comma()))
else:
content = row[i]
ws.cell(row=row_count,column=i+1).value = content
row_count +=1
wb.save(xlsxfile)
print('Finished!')
Following Charlie's suggestion, I rewrote the conversion using Functions instead of a Class and then tried processing a million items in a csv file using the Class and Functions methods. Results:
Functions win. Thank you Charlie!
The Function method is below:
import csv
import openpyxl
#
# Convert the data in csv file format that contains a mix of
# strings, integers and dollar amounts into xlsx file format
#
csvfile = 'large_test_export.csv'
xlsxfile = 'new_xlsx_f.xlsx'
wb = openpyxl.Workbook()
ws = wb.active
# remove $ and , from numbers
def strip_stuff(a_string):
try:
temp = a_string.replace(',','')
except TypeError as err:
print(err)
try:
temp2 = temp.replace('$','')
except TypeError as err:
print(err)
try:
temp3 = float(temp2)
return temp3
except ValueError as err:
return temp2
def is_number(b_string):
temp = strip_stuff(b_string)
try:
float (temp)
return True
except ValueError:
return False
with open(csvfile) as f:
reader = csv.reader(f, delimiter=',', quotechar='"')
row_count=1
for row in reader:
for i in range(len(row)):
if is_number(row[i]):
content = strip_stuff(row[i])
else:
content = row[i]
ws.cell(row=row_count,column=i+1).value = content
row_count +=1
wb.save(xlsxfile)
print('Finished!')
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.