簡體   English   中英

在 Python 數據類上使用 hash 標識

[英]Using hash for identity on Python dataclasses

我有以下Filer實體(在域驅動設計意義上)。

from dataclasses import dataclass, field

@dataclass
class Address:
    street: str
    city: str
    state: str
    zipcode: str

@dataclass
class Filer:
    cik: int
    name: str = field(hash=False, compare=True)
    state: str = field(hash=False, compare=True)
    yearend: str = field(hash=False, compare=True)
    businessaddress: Address = field(hash=False, compare=True)
    mailingaddress: Address = field(hash=False, compare=True)
    sic: int = field(hash=False, compare=True)
    ein: str = field(hash=False, compare=True, default=None)

對於任何Filercik本身決定身份。 但是,我想使用相等比較來查看有關Filer的任何其他詳細信息是否可能已更改(例如,與同一Filer的先前版本相比)。 在此基礎上,我在除cik之外的所有字段上設置hash=False, compare=True (默認情況下,其中hash=True )。

以下測試用例快速概述了預期行為:

  • 身份:完全由cik確定並通過AssertIsAssertIsNot測試
  • 平等:由所有字段確定並通過AssertEqualAssertNotEqual測試
import unittest

class TestFiler(unittest.TestCase):
    
    def test_equality_same_filer(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertEqual(a, b)

    def test_identity_same_filer(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIs(a, b)

    def test_equality_same_filer_new_name(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "A new name for the company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertNotEqual(a, b)

    def test_identity_same_filer_new_name(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "A new name for the company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIs(a, b)

    def test_equality_different_filer_same_details(self,):
        a = Filer(4321, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertNotEqual(a, b)

    def test_identity_different_filer_same_details(self,):
        a = Filer(4321, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIsNot(a, b)

if __name__ == "__main__":
    unittest.main()

結果沒有按預期 go 。

base) randm@pearljam /home/randm/Projects/secfilings $ /home/randm/Libraries/anaconda3/bin/python /home/randm/Projects/scrap/filer.py
....FF
======================================================================
FAIL: test_identity_same_filer (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/randm/Projects/scrap/filer.py", line 51, in test_identity_same_filer
    self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)

======================================================================
FAIL: test_identity_same_filer_new_name (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/randm/Projects/scrap/filer.py", line 77, in test_identity_same_filer_new_name
    self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='A new name for the company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)

----------------------------------------------------------------------
Ran 6 tests in 0.001s

FAILED (failures=2)

有沒有辦法讓我使用is測試(不使用數據類方法is_或類似的方法,這會改變我在客戶端代碼中尋找的簡潔語法)。 還是我只是在濫用身份(我相信它基於 CPython 中的指針值),而應該在我的客戶端代碼中明確使用 hash 相等性?

正如@LhasaDad 指出的那樣,沒有辦法覆蓋 Python is身份檢查。 它總是指的是實際相同的對象 object。 (有點適用於字符串,但對整數表現“出乎意料” 。)

您可以在數據類定義中使用unsafe_hash=True (帶有eq=True ,以便您可以使用hash(a) == hash(b) 但是如果你想讓它感覺更自然,你也可以創建一個方法is_並執行a.is_(b) 請注意,如果您的課程中有其他用戶/編碼人員,您需要清楚何時is_可以為 True 但==可以為 false; 和所有其他組合。

@dataclass(unsafe_hash=True)
Filer:
    ...  # everything else the same

那么你的身份測試將基於hash()

此外,您setUpab使用測試設置,而不是在每個測試中復制粘貼它們。 閱讀您的代碼的人(如我們)仍然必須在每個測試中檢查兩者的完整定義,看看有什么不同。 一個月后,你也會。 對於您的測試僅略有不同的對象,請使用dataclasses.replace()

這是單元測試的可讀性更強的版本,其中添加了基於哈希的檢查:

import dataclasses
import unittest

class TestFiler(unittest.TestCase):
    def setUp(self):
        self.a = Filer(1234, "Some company", "Some state", "0930",
                       Address("Some address", "Some city", "AB", "12345"),
                       Address("Some address", "Some city", "AB", "12345"),
                       1000, 1234567)
        self.b = Filer(1234, "Some company", "Some state", "0930",
                       Address("Some address", "Some city", "AB", "12345"),
                       Address("Some address", "Some city", "AB", "12345"),
                       1000, 1234567)
    
    def test_equality_same_filer(self):
        self.assertEqual(self.a, self.b)
    
    def test_identity_same_filer(self):  # will still fail
        self.assertIs(self.a, self.b)
    
    def test_equality_same_filer_new_name(self):
        # make it clear that `a` and `c` only differ by name:
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertNotEqual(self.a, c)
    
    def test_identity_same_filer_new_name(self):  # will still fail
        # or put c also in `setUp`
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertIs(self.a, c)
    
    def test_equality_different_filer_same_details(self):
        new_a = dataclasses.replace(self.a, cik=4321)
        self.assertIsNot(new_a, self.a)  # better
    
    def test_identity_different_filer_same_details(self):
        new_a = dataclasses.replace(self.a, cik=4321)
        self.assertIsNot(new_a, self.a)
    
    def test_hash_same_filer(self):  # NEW
        self.assertEqual(hash(self.a), hash(self.b))
    
    def test_hash_same_filer_new_name(self):  # NEW
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertEqual(hash(c), hash(self.a))
    
    def test_identity_different_filer_same_details(self):  # NEW
        diff_a = dataclasses.replace(self.a, cik=4321)
        self.assertNotEqual(hash(diff_a), hash(self.a))


if __name__ == "__main__":
    unittest.main()

你錯過了使用assertIs 它使用 python行為。 也就是說,它們必須引用相同的 object。 由於您已經構建了 2 個不同的對象,因此它們之間的is測試將始終為假。 Equals 是對等值的正確檢驗。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM