[英]Subtracting two lists in Python
在 Python 中,如何減去兩個非唯一的無序列表? 假設我們有a = [0,1,2,1,0]
和b = [0, 1, 1]
我想做類似c = a - b
事情並且讓c
成為[2, 0]
或[0, 2]
順序對我來說無關緊要。 如果 a 不包含 b 中的所有元素,這應該拋出異常。
請注意,這與集合不同! 我對找出 a 和 b 中元素集的差異不感興趣,我對 a 和 b 中元素的實際集合之間的差異感興趣。
我可以用 for 循環來做到這一點,在 a 中查找 b 的第一個元素,然后從 b 和 a 中刪除元素,等等。但這對我沒有吸引力,它會非常低效( O(n^2)
時間),而在O(n log n)
時間內執行此操作應該沒有問題。
我知道“for”不是你想要的,但它簡單明了:
for x in b:
a.remove(x)
或者如果b
成員可能不在a
則使用:
for x in b:
if x in a:
a.remove(x)
Python 2.7 和 3.2 添加了collections.Counter
類,它是一個字典子類,將元素映射到元素的出現次數。 這可以用作多重集。 你可以這樣做:
from collections import Counter
a = Counter([0, 1, 2, 1, 0])
b = Counter([0, 1, 1])
c = a - b # ignores items in b missing in a
print(list(c.elements())) # -> [0, 2]
同樣,如果您想檢查b
中a
每個元素是否都在a
:
# a[key] returns 0 if key not in a, instead of raising an exception
assert all(a[key] >= b[key] for key in b)
但是由於您堅持使用 2.5,如果失敗,您可以嘗試導入它並定義您自己的版本。 這樣,您將確保獲得最新版本(如果可用),如果沒有,則回退到工作版本。 如果將來轉換為 C 實現,您還將受益於速度改進。
try:
from collections import Counter
except ImportError:
class Counter(dict):
...
您可以在此處找到當前的 Python 源代碼。
我會以更簡單的方式做到這一點:
a_b = [e for e in a if not e in b ]
..正如所寫,這是錯誤的 - 只有當項目在列表中是唯一的時才有效。 如果是,最好使用
a_b = list(set(a) - set(b))
我不確定對 for 循環的反對意見是什么:Python 中沒有多重集,因此您不能使用內置容器來幫助您。
在我看來,任何一行(如果可能)都可能很難理解。 追求可讀性和親吻。 Python 不是 C :)
Python 2.7+ 和 3.0 有collections.Counter (又名多重集)。 文檔鏈接到配方 576611: Python 2.5 的計數器類:
from operator import itemgetter
from heapq import nlargest
from itertools import repeat, ifilter
class Counter(dict):
'''Dict subclass for counting hashable objects. Sometimes called a bag
or multiset. Elements are stored as dictionary keys and their counts
are stored as dictionary values.
>>> Counter('zyzygy')
Counter({'y': 3, 'z': 2, 'g': 1})
'''
def __init__(self, iterable=None, **kwds):
'''Create a new, empty Counter object. And if given, count elements
from an input iterable. Or, initialize the count from another mapping
of elements to their counts.
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
>>> c = Counter(a=4, b=2) # a new counter from keyword args
'''
self.update(iterable, **kwds)
def __missing__(self, key):
return 0
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abracadabra').most_common(3)
[('a', 5), ('r', 2), ('b', 2)]
'''
if n is None:
return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
return nlargest(n, self.iteritems(), key=itemgetter(1))
def elements(self):
'''Iterator over elements repeating each as many times as its count.
>>> c = Counter('ABCABC')
>>> sorted(c.elements())
['A', 'A', 'B', 'B', 'C', 'C']
If an element's count has been set to zero or is a negative number,
elements() will ignore it.
'''
for elem, count in self.iteritems():
for _ in repeat(None, count):
yield elem
# Override dict methods where the meaning changes for Counter objects.
@classmethod
def fromkeys(cls, iterable, v=None):
raise NotImplementedError(
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
def update(self, iterable=None, **kwds):
'''Like dict.update() but add counts instead of replacing them.
Source can be an iterable, a dictionary, or another Counter instance.
>>> c = Counter('which')
>>> c.update('witch') # add elements from another iterable
>>> d = Counter('watch')
>>> c.update(d) # add elements from another counter
>>> c['h'] # four 'h' in which, witch, and watch
4
'''
if iterable is not None:
if hasattr(iterable, 'iteritems'):
if self:
self_get = self.get
for elem, count in iterable.iteritems():
self[elem] = self_get(elem, 0) + count
else:
dict.update(self, iterable) # fast path when counter is empty
else:
self_get = self.get
for elem in iterable:
self[elem] = self_get(elem, 0) + 1
if kwds:
self.update(kwds)
def copy(self):
'Like dict.copy() but returns a Counter instance instead of a dict.'
return Counter(self)
def __delitem__(self, elem):
'Like dict.__delitem__() but does not raise KeyError for missing values.'
if elem in self:
dict.__delitem__(self, elem)
def __repr__(self):
if not self:
return '%s()' % self.__class__.__name__
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
return '%s({%s})' % (self.__class__.__name__, items)
# Multiset-style mathematical operations discussed in:
# Knuth TAOCP Volume II section 4.6.3 exercise 19
# and at http://en.wikipedia.org/wiki/Multiset
#
# Outputs guaranteed to only include positive counts.
#
# To strip negative and zero counts, add-in an empty counter:
# c += Counter()
def __add__(self, other):
'''Add counts from two counters.
>>> Counter('abbb') + Counter('bcc')
Counter({'b': 4, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem in set(self) | set(other):
newcount = self[elem] + other[elem]
if newcount > 0:
result[elem] = newcount
return result
def __sub__(self, other):
''' Subtract count, but keep only results with positive counts.
>>> Counter('abbbc') - Counter('bccd')
Counter({'b': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem in set(self) | set(other):
newcount = self[elem] - other[elem]
if newcount > 0:
result[elem] = newcount
return result
def __or__(self, other):
'''Union is the maximum of value in either of the input counters.
>>> Counter('abbb') | Counter('bcc')
Counter({'b': 3, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
_max = max
result = Counter()
for elem in set(self) | set(other):
newcount = _max(self[elem], other[elem])
if newcount > 0:
result[elem] = newcount
return result
def __and__(self, other):
''' Intersection is the minimum of corresponding counts.
>>> Counter('abbb') & Counter('bcc')
Counter({'b': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
_min = min
result = Counter()
if len(self) < len(other):
self, other = other, self
for elem in ifilter(self.__contains__, other):
newcount = _min(self[elem], other[elem])
if newcount > 0:
result[elem] = newcount
return result
if __name__ == '__main__':
import doctest
print doctest.testmod()
然后你可以寫
a = Counter([0,1,2,1,0])
b = Counter([0, 1, 1])
c = a - b
print list(c.elements()) # [0, 2]
使用列表理解:
[i for i in a if not i in b or b.remove(i)]
會做的伎倆。 雖然它會在這個過程中改變 b 。 但我同意 jkp 和 Dyno Fu 的觀點,即使用 for 循環會更好。
也許有人可以創建一個使用列表理解的更好的例子,但仍然是 KISS?
為了證明 jkp 的觀點,即“一行中的任何內容都可能非常難以理解”,我創建了一個單行。 請不要讓我失望,因為我知道這不是您應該實際使用的解決方案。 它僅用於演示目的。
這個想法是將 a 中的值一一相加,只要您添加該值的總次數小於該值在 a 中的總次數減去它在 b 中的次數:
[ value for counter,value in enumerate(a) if a.count(value) >= b.count(value) + a[counter:].count(value) ]
恐怖! 但也許有人可以改進它? 它甚至沒有錯誤嗎?
編輯:看到 Devin Jeanpierre 關於使用字典數據結構的評論,我想出了這個 oneliner:
sum([ [value]*count for value,count in {value:a.count(value)-b.count(value) for value in set(a)}.items() ], [])
更好,但仍然無法閱讀。
你可以嘗試這樣的事情:
class mylist(list):
def __sub__(self, b):
result = self[:]
b = b[:]
while b:
try:
result.remove(b.pop())
except ValueError:
raise Exception("Not all elements found during subtraction")
return result
a = mylist([0, 1, 2, 1, 0] )
b = mylist([0, 1, 1])
>>> a - b
[2, 0]
你必須定義 [1, 2, 3] - [5, 6] 應該輸出什么,我想你想要 [1, 2, 3] 這就是我忽略 ValueError 的原因。
編輯:現在我看到你想要一個異常,如果a
不包含所有元素,添加它而不是傳遞 ValueError。
我試圖找到一個更優雅的解決方案,但我能做的最好的事情基本上和 Dyno Fu 說的一樣:
from copy import copy
def subtract_lists(a, b):
"""
>>> a = [0, 1, 2, 1, 0]
>>> b = [0, 1, 1]
>>> subtract_lists(a, b)
[2, 0]
>>> import random
>>> size = 10000
>>> a = [random.randrange(100) for _ in range(size)]
>>> b = [random.randrange(100) for _ in range(size)]
>>> c = subtract_lists(a, b)
>>> assert all((x in a) for x in c)
"""
a = copy(a)
for x in b:
if x in a:
a.remove(x)
return a
這是一個相對較長但有效且可讀的解決方案。 是 O(n)。
def list_diff(list1, list2):
counts = {}
for x in list1:
try:
counts[x] += 1
except:
counts[x] = 1
for x in list2:
try:
counts[x] -= 1
if counts[x] < 0:
raise ValueError('All elements of list2 not in list2')
except:
raise ValueError('All elements of list2 not in list1')
result = []
for k, v in counts.iteritems():
result += v*[k]
return result
a = [0, 1, 1, 2, 0]
b = [0, 1, 1]
%timeit list_diff(a, b)
%timeit list_diff(1000*a, 1000*b)
%timeit list_diff(1000000*a, 1000000*b)
100000 loops, best of 3: 4.8 µs per loop
1000 loops, best of 3: 1.18 ms per loop
1 loops, best of 3: 1.21 s per loop
您可以使用map
構造來執行此操作。 它看起來很不錯,但要注意map
線本身將返回一個None
列表。
a = [1, 2, 3]
b = [2, 3]
map(lambda x:a.remove(x), b)
a
c = [i for i in b if i not in a]
list(set([x for x in a if x not in b]))
a
和b
不變。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.