[英]Group consecutive dates together with Python
鑒於:
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
預期產量:
expect = [
[datetime(2014, 10, 1), datetime(2014, 10, 3)],
[datetime(2014, 10, 5), datetime(2014, 10, 7)],
datetime(2014, 10, 9),
datetime(2014, 10, 11),
[datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
在Python中實現:
from datetime import datetime, timedelta
def parse_date_ranges(dates):
if(not dates or not len(dates) > 0):
return False
# make sure to order dates
dates.sort()
# init values
result = []
tupl = [dates[0], dates[0]]
it = iter(dates)
date = True
def add_tuple_to_result(tuple):
# if first part of tuple differs from last part -> add full tuple
# else -> add first part of tuple only
result.append(tupl if tupl[0] != tupl[1] else tupl[0])
while date:
# get next date or False if no next date
date = next(it, False)
# filter double dates
if(date in tupl):
continue
elif(date):
if(date - timedelta(days=1) == tupl[1]):
# consecutive date, so add date to end of current tuple
tupl[1] = date
else:
# gap larger than 1 day: add current tuple to result
# and create new tuple
add_tuple_to_result(tupl)
tupl = [date, date]
else:
# date == false, so this is the last step.
# add the current tuple to result
add_tuple_to_result(tupl)
return result
有關更多測試,請參閱https://gist.github.com/mattes/8987332 。
題
實現工作,但我是Python的新手。 所以我想知道是否有更好的方法來解決這個問題? 或者它很好嗎?
我喜歡itertools :
from itertools import tee, zip_longest
from datetime import datetime, timedelta
one_day = timedelta(days=1)
def pairwise(iterable):
a, b = tee(iterable)
next(b, None)
return zip_longest(a, b, fillvalue=None)
def collapse_ranges(sorted_iterable, inc):
pairs = pairwise(sorted_iterable)
for start, tmp in pairs:
if inc(start) == tmp:
for end, tmp in pairs:
if inc(end) != tmp:
break
yield start, end
else:
yield start
# dates = [...]
numbers = [11, 1, 2, 3, 5, 5, 6, 22, 20, 21, 9, 7, 6]
if __name__ == '__main__':
import pprint
for each in collapse_ranges(sorted(set(dates)), lambda d: d + one_day):
pprint.pprint(each)
for each in collapse_ranges(sorted(set(numbers)), (1).__add__):
pprint.pprint(each)
結果:
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
(1, 3)
(5, 7)
9
11
(20, 22)
你可以通過調整這個類似標題的答案來避免重新發明輪子
將連續日期分組在一起使問題與datetime
對象一起使用:
def parse_date_ranges(dates):
def group_consecutive(dates):
dates_iter = iter(sorted(set(dates))) # de-dup and sort
run = [next(dates_iter)]
for d in dates_iter:
if (d.toordinal() - run[-1].toordinal()) == 1: # consecutive?
run.append(d)
else: # [start, end] of range else singleton
yield [run[0], run[-1]] if len(run) > 1 else run[0]
run = [d]
yield [run[0], run[-1]] if len(run) > 1 else run[0]
return list(group_consecutive(dates)) if dates else False
我為您編寫了另一個解決方案,並寫了幾條注釋來嘗試解釋代碼。
from datetime import datetime, timedelta
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
# Remove duplicates, and sort the dates ascending
sorted_dates = sorted(set(dates))
# Set initial first and last element as the current element
first, last = sorted_dates[0], sorted_dates[0]
date_ranges = []
# Loop over the sorted list from the second value
for d in sorted_dates[1:]:
# Check if the current date is exactly one day later then the current
# "last" date
if d - last != timedelta(days=1):
date_ranges.append(tuple(sorted({first, last})))
first, last = d, d
else:
last = d
# Handle last element
if first == last:
date_ranges.append((first,))
else:
date_ranges.append((first, last))
for dt_pair in date_ranges:
print dt_pair
輸出:
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
(datetime.datetime(2014, 10, 9, 0, 0),)
(datetime.datetime(2014, 10, 11, 0, 0),)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
調整此答案以使用datetime對象。 這包括非唯一和非排序的輸入,也兼容python3:
import itertools
from datetime import datetime, timedelta
def datetimes_to_ranges(iterable):
iterable = sorted(set(iterable))
keyfunc = lambda t: t[1] - timedelta(days=t[0])
for key, group in itertools.groupby(enumerate(iterable), keyfunc):
group = list(group)
if len(group) == 1:
yield group[0][1]
else:
yield group[0][1], group[-1][1]
例:
>>> for i in datetimes_to_ranges(dates): i
...
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.