[英]Python trying to Refactor (DRY out) a long Control Flow
我從中獲取了大量數據,並且SQL查詢需要很長時間才能運行。 由於SQL查詢需要很長時間才能運行,因此我以最精細的形式從數據庫中獲取數據。 然后,我循環瀏覽這些數據一次,並以對我有用的形式對其進行匯總。
我的問題是我一遍又一遍地重復自己。 但是,我不確定重構此控制流的最佳方法。 提前致謝!
def processClickOutData(cls, raw_data):
singles = {}
total={}
absolute_total = 0
channels = {}
singles_true = {}
total_true={}
channels_true = {}
absolute_total_true = 0
list_channels = set([])
list_tids = set([])
total_position = {}
total_position_true = {}
tid_position = {}
channel_position = {}
channel_position_true = {}
tid_position_true = {}
for row in raw_data:
gap=row[0]
count=row[1]
tid=row[2]
prefered=row[3]
channel=row[4]
position=row[5]
list_channels.add(channel)
list_tids.add(tid)
absolute_total += int(count)
if total.has_key(gap):
total[gap] += count
else:
total[gap] = count
if singles.has_key(gap) and singles[gap].has_key(tid):
singles[gap][tid] += count
elif singles.has_key(gap):
singles[gap][tid] = count
else:
singles[gap] = {}
singles[gap][tid] = count
if channels.has_key(gap) and channels[gap].has_key(channel):
channels[gap][channel] += count
elif channels.has_key(gap):
channels[gap][channel] = count
else:
channels[gap] = {}
channels[gap][channel] = count
if total_position.has_key(position):
total_position[position] += count
else:
total_position[position] = count
if tid_position.has_key(position) and tid_position[position].has_key(tid):
tid_position[position][tid] += count
elif tid_position.has_key(position):
tid_position[position][tid] = count
else:
tid_position[position] = {}
tid_position[position][tid] = count
if channel_position.has_key(position) and channel_position[position].has_key(channel):
channel_position[position][channel] += count
elif channel_position.has_key(position):
channel_position[position][channel] = count
else:
channel_position[position] = {}
channel_position[position][channel] = count
if prefered == 0:
absolute_total_true += count
if total_true.has_key(gap):
total_true[gap] += count
else:
total_true[gap] = count
if singles_true.has_key(gap) and singles_true[gap].has_key(tid):
singles_true[gap][tid] += count
elif singles_true.has_key(gap):
singles_true[gap][tid] = count
else:
singles_true[gap] = {}
singles_true[gap][tid] = count
if channels_true.has_key(gap) and channels_true[gap].has_key(channel):
channels_true[gap][channel] += count
elif channels_true.has_key(gap):
channels_true[gap][channel] = count
else:
channels_true[gap] = {}
channels_true[gap][channel] = count
if total_position_true.has_key(position):
total_position_true[position] += count
else:
total_position_true[position] = count
if tid_position_true.has_key(position) and tid_position_true[position].has_key(tid):
tid_position_true[position][tid] += count
elif tid_position_true.has_key(position):
tid_position_true[position][tid] = count
else:
tid_position_true[position] = {}
tid_position_true[position][tid] = count
if channel_position_true.has_key(position) and channel_position_true[position].has_key(channel):
channel_position_true[position][channel] += count
elif channel_position_true.has_key(position):
channel_position_true[position][channel] = count
else:
channel_position_true[position] = {}
channel_position_true[position][channel] = count
final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
"total_position" : total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
return final_values
您用來存儲數據的整個結構可能是錯誤的,但是由於我不知道您如何使用它,因此我無法為您提供幫助。
您可以使用collections.defaultdict
擺脫所有的has_key()
調用。 注意thedict.has_key(key)
仍然被棄用,您應該只key in thedict
使用key in thedict
。
再看看我如何更改for
循環-您可以直接在for
語句中分配名稱,而無需單獨進行操作。
from collections import defaultdict
def processClickOutData(cls, raw_data):
absolute_total = 0
absolute_total_true = 0
list_channels = set()
list_tids = set()
total = defaultdict(int)
total_true = defaultdict(int)
total_position = defaultdict(int)
total_position_true = defaultdict(int)
def defaultdict_int():
return defaultdict(int)
singles = defaultdict(defaultdict_int)
singles_true = defaultdict(defaultdict_int)
channels = defaultdict(defaultdict_int)
channels_true = defaultdict(defaultdict_int)
tid_position = defaultdict(defaultdict_int)
tid_position_true = defaultdict(defaultdict_int)
channel_position = defaultdict(defaultdict_int)
channel_position_true = defaultdict(defaultdict_int)
for gap, count, prefered, channel, position in raw_data:
list_channels.add(channel)
list_tids.add(tid)
absolute_total += count
total[gap] += count
singles[gap][tid] += count
channels[gap][channel] += count
total_position[position] += count
tid_position[position][tid] += count
channel_position[position][channel] += count
if prefered == 0:
absolute_total_true += count
total_true[gap] += count
singles_true[gap][tid] += count
channels_true[gap][channel] += count
total_position_true[position] += count
tid_position_true[position][tid] += count
channel_position_true[position][channel] += count
final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
"total_position" : total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
return final_values
如果這些鍵不存在,它將自動填寫正確的默認值。 您在這里有兩種。 在添加int
的位置,如果它不存在, defaultdict(int)
以0
開頭-這就是int
返回的內容,因此是defaultdict(int)
。 在添加添加int
的字典的地方,需要使用一個返回defaultdict(int)
的函數,該函數就是defaultdict_int
所做的事情。
編輯:建議的備用字典結構:
position = defaultdict(lambda: defaultdict(defaultdict_int))
gap = defaultdict(lambda: defaultdict(defaultdict_int))
absolute_total = 0
for gap, count, prefered, channel, position in raw_data:
absolute_total += count
posd = position[position]
posd.setdefault('total', 0)
posd['total'] += count
posd['tid'][tid] += count
posd['channel'][channel] += count
gapd = gap[gap]
gapd.setdefault('total', 0)
gapd['total'] += count
gapd['tid'][tid] += count
gapd['channel'][channel] += count
對_true
版本也執行相同的_true
,而您從12 dict
變為4。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.