[英]Finding consecutive months in a list
我有興趣在無序列的月份列表中找到最大的集合,它可以作為不同的連續月份的有序列表返回。
例如:
consecutive_months(["December", "January", "February", "April"])
output:
"December", "January", "February"
和:
consecutive_months(["February", "December", "January"])
output:
"December", "January", "February"
以下有效,但我很好奇是否有人對更優雅的方式有想法:
MONTHS = ["January", "February", "March",
"April", "May", "June",
"July", "August", "September",
"October", "November", "December"]
def consecutive_months(lst_of_months):
# create two years of months to handle going from Dec to Jan
results = []
for m in set(lst_of_months):
results.append((m,MONTHS.index(m)))
results.append((m,MONTHS.index(m)+12))
results = sorted(results, key=lambda x: x[1])
# find the longest series of consecutive months
this_series = []
longest_series = []
for this_month in results:
if len(this_series) > 0:
last_month = this_series[-1]
if last_month[1] + 1 == this_month[1]:
this_series.append(this_month)
else:
this_series = [this_month]
else:
this_series = [this_month]
if len(this_series) > len(longest_series):
longest_series = [m for (m,i) in this_series]
return longest_series
這是一個帶有示例輸入和預期輸出的 pastebin。
我注意到您的代碼存在一個問題:當輸入中出現所有 12 個月時,output 將所有月份列出兩次。 這很容易解決,只需執行以下操作:
return longest_series[:12]
我將 go 用於解決方案,其中輸入被轉換為一種 12 位的“位圖”,其中 1 表示相應的月份在輸入中,而 0 表示不是。
如果表示為 12 個字符的字符串,則可以使用正則表達式輕松識別“1”的序列。
我還會對月份列表進行一些預處理,這樣你就有了它的列表和字典版本,並且將列表翻了一番,這樣你就可以從它跨越 12 個邊界進行切片。
這是建議的代碼:
import re
months = ["January", "February", "March",
"April", "May", "June",
"July", "August", "September",
"October", "November", "December"]
# Also create a dictionary to get a month's index
month_nums = { month: num for num, month in enumerate(months) }
# ... and double the months list, to ease slicing across the 12-boundary
months += months
def consecutive_months(given_months):
# Deal with boundary case
if not given_months:
return []
# Convert input to 12 bits in string format
lst = ["0"] * 12
for m in given_months:
lst[month_nums[m]] = "1"
bits = "".join(lst)
# Identify the longest chunk of consecutive "1" in that doubled string
_, start, end = max((j-i, i, j)
for i, j in (match.span(0)
for match in re.finditer("1+", bits + bits)
)
)
# Using the found span, extract the corresponding month names
return months[start:end][:12]
月份字符串只是一個符號,它的本質還是它后面對應的數字,從1到12,月復一月。
兩個月的字符串不能直接比較。 如果將它們轉換為數字,則可以通過加1來計算下個月的數字(12月之后的1月除外),數字之間的比較肯定大於字符串。
我的優化代碼如下:
MONTHS = ["January", "February", "March",
"April", "May", "June",
"July", "August", "September",
"October", "November", "December"]
month_num_dict = {month: num for num, month in enumerate(MONTHS, start=1)}
def consecutive_months(month_list: list) -> list:
# Deal with boundary case
if len(month_list) == 0:
return month_list
# A list of twice length is required only when the first and last months end to end
first_month_num = month_num_dict[month_list[0]]
last_month_num = month_num_dict[month_list[-1]]
last_month_next_num = last_month_num + 1 if last_month_num != 12 else 1
month_list = month_list * 2 if last_month_next_num == first_month_num else month_list
# Initialize list of candidates and longest series
candidate = [month_list[0], ]
longest_series = [month_list[0], ]
for i in range(len(month_list) - 1):
month = month_list[i]
month_num = month_num_dict[month]
next_month = month_list[i + 1]
next_month_num = month_num_dict[next_month]
expected_next_month_num = month_num + 1 if month_num != 12 else 1
if expected_next_month_num == next_month_num:
candidate.append(next_month)
# At the end of the traversal, decide whether to update the longest series
# according to the length of the candidate.
if i == len(month_list) - 2 and len(candidate) > len(longest_series):
longest_series = candidate
else:
# When the length of the new candidate is greater than the old, update the longest series
if len(candidate) > len(longest_series):
longest_series = candidate
# Generate next candidate month list
candidate = [next_month, ]
# Deal with all 12 months input list
if len(longest_series) > 12:
return MONTHS
return longest_series
如果擔心手寫的MONTHS
列表可能有誤,也可以通過time.strftime
:
import time
import locale
locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
month_num_dict = {
time.strftime("%B", time.strptime(str(num), "%m")): num
for num in range(1, 13)
}
MONTHS = list(month_num_dict.keys())
當然,為了設置回原來的locale,保證線程安全,可以加一個線程互斥鎖,代碼可以參考這個答案,我的完整代碼包含了所有的測試數據,可以看這里。
以下是一位朋友提出的兩種工作方法,他們也研究了這個問題。 第一個是高性能的並使用模運算符,因此列表不需要復制到自身上。
month_names = [
'January', 'February',
'March', 'April', 'May',
'June', 'July', 'August',
'September', 'October',
'November', 'December'
]
# Looks like: {'January': 0, 'February': 1...}
month_name_to_index = {
value: index
for index, value
in enumerate(month_names)
}
def consecutive_months(list_of_months_by_name):
if not list_of_months_by_name:
# If the list is empty, return None.
return None
month_was_seen = [False] * 12 # Looks like: [False, False, ...]
for month_name in list_of_months_by_name:
month_was_seen[month_name_to_index[month_name]] = True
# Seek to first missing month:
for start_index in range(12):
if not month_was_seen[start_index]:
break
# If there is no missing month, return the whole year.
if month_was_seen[start_index]:
return {"from": "January", "to": "December", "length": 12}
# Starting from the first missing month, loop around the year
# and keep track of the longest run using one boolean and four
# integers.
running = False
longest_run_index = None
longest_run_length = 0
current_run_index = None
current_run_length = None
for offset in range(1, 13):
index = (start_index + offset) % 12
if month_was_seen[index]:
# Continue a run or begin a run.
if running:
current_run_length += 1
continue
running = True
current_run_index = index
current_run_length = 1
continue
if running:
# End the run.
running = False
if current_run_length > longest_run_length:
longest_run_index = current_run_index
longest_run_length = current_run_length
return {
"from": month_names[longest_run_index],
"to": month_names[(longest_run_index + longest_run_length - 1) % 12],
"length": longest_run_length
}
第二個是巧妙的單線:
MONTH_NAMES = [
'January', 'February',
'March', 'April', 'May',
'June', 'July', 'August',
'September', 'October',
'November', 'December'
]
def consecutive_months(list_of_months_by_name):
return max(
(
len(segment)-segment.index(":")-1,
(MONTH_NAMES*2)[
int(segment[:segment.index(":")])+1
:
int(segment[:segment.index(":")]) + len(segment) - segment.index(":")
]
)
for segment in
"".join([
"x" if month_name in list_of_months_by_name else f",{index}:"
for index, month_name in enumerate(MONTH_NAMES*2)
]).split(",")
if ":" in segment
)[1] if set(MONTH_NAMES) - set(list_of_months_by_name) else MONTH_NAMES
兩種算法都返回測試數據的預期結果。 謝謝AV!
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.