[英]How to convert these for loops to list comprehensions and map functions
我想提高我的代码的性能,但无法让它工作。 我将如何更改以下 for 循环以列出理解?
def normalise_data(data): # returns unique rows
dim_data = []
for row in data: # data is a list of lists
if row not in dim_data:
dim_data.append(row)
return dim_data
def convert_dates(data):
if self.date_col: # date_col is part of constructor argument (can be None hence if statement here)
for row in data:
for index in self.date_col:
if len(row[index]) > 10:
row[index] = row[index][:-5].replace('T',' ')
row[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d %H:%M:%S")
else:
row[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d").date()
return data
for file in files:
csv_file = read_file(file) # read_file function opens, reads and converts csv data to a nested list
csv_headers = ', '.join(csv_file[0])
csv_data = csv_file[1:]
csv_data = normalise_data(csv_data)
csv_data = convert_dates(csv_data)
我听说列表推导式和映射函数比 for 循环更快,但是我无法成功地将我的 for 循环转换为它。
normalize_data
函数只是删除重复项。 您可以使用使用一个共同的独特everseen算法set
。
def normalise_data(data):
seen = set(); seen_add = seen.add
return [x for x in data if not (x in seen or seen_add(tuple(x)))]
至于另一个循环,我不确定你在迭代什么,但使用itertools.product
可能比这里的嵌套循环更好。
def convert_dates(data):
if self.date_col: # date_col is part of constructor argument (can be None hence if statement here)
for row, index in itertools.product(data, self.date_col):
if len(row[index]) > 10:
data[index] = row[index][:-5].replace('T',' ')
data[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d %H:%M:%S")
else:
data[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d").date()
return data
重构
代码
from numba import jit
def normalise_data(data):
""" Using set to find unique items
Map elements first two tuples, so we can create a set of elements """
#return list(dict.fromkeys(data))
return list(map(list, (set(map(tuple, lst)))))
@jit
def convert_dates(data):
if self.date_col: # date_col is part of constructor argument (can be None hence if statement here)
for row in data:
for index in self.date_col:
if len(row[index]) > 10:
row[index] = row[index][:-5].replace('T',' ')
row[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d %H:%M:%S")
else:
row[index] = datetime.datetime.strptime(row[index], "%Y-%m-%d").date()
return data
for file in files:
csv_file = read_file(file) # read_file function opens, reads and converts csv data to a nested list
csv_headers = ', '.join(csv_file[0])
csv_data = csv_file[1:]
csv_data = normalise_data(csv_data)
csv_data = convert_dates(csv_data)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.