简体   繁体   中英

Group consecutive dates together with Python

Given:

dates = [
  datetime(2014, 10, 11), 

  datetime(2014, 10, 1), 
  datetime(2014, 10, 2),
  datetime(2014, 10, 3),

  datetime(2014, 10, 5), 
  datetime(2014, 10, 5), 
  datetime(2014, 10, 6), 

  datetime(2014, 10, 22), 
  datetime(2014, 10, 20),
  datetime(2014, 10, 21),

  datetime(2014, 10, 9), 

  datetime(2014, 10, 7),
  datetime(2014, 10, 6)
]

Expected output:

expect = [
  [datetime(2014, 10, 1), datetime(2014, 10, 3)],
  [datetime(2014, 10, 5), datetime(2014, 10, 7)],
   datetime(2014, 10, 9),
   datetime(2014, 10, 11),
  [datetime(2014, 10, 20), datetime(2014, 10, 22)]
]

Implementation in Python:

from datetime import datetime, timedelta

def parse_date_ranges(dates):
    if(not dates or not len(dates) > 0):
        return False

    # make sure to order dates
    dates.sort()

    # init values
    result  = []
    tupl    = [dates[0], dates[0]]
    it      = iter(dates)
    date    = True

    def add_tuple_to_result(tuple):
        # if first part of tuple differs from last part -> add full tuple
        # else -> add first part of tuple only
        result.append(tupl if tupl[0] != tupl[1] else tupl[0])

    while date:
        # get next date or False if no next date
        date = next(it, False)

        # filter double dates
        if(date in tupl):
            continue

        elif(date):
            if(date - timedelta(days=1) == tupl[1]):
                # consecutive date, so add date to end of current tuple
                tupl[1] = date
            else:
                # gap larger than 1 day: add current tuple to result
                # and create new tuple
                add_tuple_to_result(tupl)
                tupl = [date, date]

        else:
            # date == false, so this is the last step.
            # add the current tuple to result
            add_tuple_to_result(tupl)

    return result

See https://gist.github.com/mattes/8987332 for some more tests, too.

Question

The implementation works, but I am new to Python. So I was wondering if there are better ways to solve this issue? Or is it just fine?

I like itertools :

from itertools import tee, zip_longest
from datetime import datetime, timedelta

one_day = timedelta(days=1)

def pairwise(iterable):
    a, b = tee(iterable)
    next(b, None)
    return zip_longest(a, b, fillvalue=None)

def collapse_ranges(sorted_iterable, inc):
    pairs = pairwise(sorted_iterable)
    for start, tmp in pairs:
        if inc(start) == tmp:
            for end, tmp in pairs:
                if inc(end) != tmp:
                    break
            yield start, end
        else:
            yield start

# dates = [...]

numbers = [11, 1, 2, 3, 5, 5, 6, 22, 20, 21, 9, 7, 6]

if __name__ == '__main__':
    import pprint
    for each in collapse_ranges(sorted(set(dates)), lambda d: d + one_day):
        pprint.pprint(each)
    for each in collapse_ranges(sorted(set(numbers)), (1).__add__):
        pprint.pprint(each)

Result:

(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
(1, 3)
(5, 7)
9
11
(20, 22)

You can avoid reinventing the wheel by adapting this answer to the similarly titled
Grouping consecutive dates together question to work with datetime objects:

def parse_date_ranges(dates):

    def group_consecutive(dates):
        dates_iter = iter(sorted(set(dates)))  # de-dup and sort

        run = [next(dates_iter)]
        for d in dates_iter:
            if (d.toordinal() - run[-1].toordinal()) == 1:  # consecutive?
                run.append(d)
            else:  # [start, end] of range else singleton
                yield [run[0], run[-1]] if len(run) > 1 else run[0]
                run = [d]

        yield [run[0], run[-1]] if len(run) > 1 else run[0]

    return list(group_consecutive(dates)) if dates else False

I wrote another solution for you, and wrote a couple of comments to try to explain the code.

from datetime import datetime, timedelta

dates = [
    datetime(2014, 10, 11),
    datetime(2014, 10, 1),
    datetime(2014, 10, 2),
    datetime(2014, 10, 3),
    datetime(2014, 10, 5),
    datetime(2014, 10, 5),
    datetime(2014, 10, 6),
    datetime(2014, 10, 22),
    datetime(2014, 10, 20),
    datetime(2014, 10, 21),
    datetime(2014, 10, 9),
    datetime(2014, 10, 7),
    datetime(2014, 10, 6)
]
# Remove duplicates, and sort the dates ascending
sorted_dates = sorted(set(dates))
# Set initial first and last element as the current element
first, last = sorted_dates[0], sorted_dates[0]
date_ranges = []

# Loop over the sorted list from the second value
for d in sorted_dates[1:]:
    # Check if the current date is exactly one day later then the current
    # "last" date
    if d - last != timedelta(days=1):
        date_ranges.append(tuple(sorted({first, last})))
        first, last = d, d
    else:
        last = d

# Handle last element
if first == last:
    date_ranges.append((first,))
else:
    date_ranges.append((first, last))

for dt_pair in date_ranges:
    print dt_pair

Output:

(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
(datetime.datetime(2014, 10, 9, 0, 0),)
(datetime.datetime(2014, 10, 11, 0, 0),)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))

Adapting this answer to use datetime objects. This covers non-unique and non-sorted input and is python3 compatible too:

import itertools
from datetime import datetime, timedelta

def datetimes_to_ranges(iterable):
    iterable = sorted(set(iterable))
    keyfunc = lambda t: t[1] - timedelta(days=t[0])
    for key, group in itertools.groupby(enumerate(iterable), keyfunc):
        group = list(group)
        if len(group) == 1:
            yield group[0][1]
        else:
            yield group[0][1], group[-1][1]

Example:

>>> for i in datetimes_to_ranges(dates): i
... 
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM