»Core Development>Code coverage>Lib/test/test_hash.py

Python code coverage for Lib/test/test_hash.py

#countcontent
1n/a# test the invariant that
2n/a# iff a==b then hash(a)==hash(b)
3n/a#
4n/a# Also test that hash implementations are inherited as expected
5n/a
6n/aimport datetime
7n/aimport os
8n/aimport sys
9n/aimport unittest
10n/afrom test.support.script_helper import assert_python_ok
11n/afrom collections import Hashable
12n/a
13n/aIS_64BIT = sys.maxsize > 2**32
14n/a
15n/adef lcg(x, length=16):
16n/a """Linear congruential generator"""
17n/a if x == 0:
18n/a return bytes(length)
19n/a out = bytearray(length)
20n/a for i in range(length):
21n/a x = (214013 * x + 2531011) & 0x7fffffff
22n/a out[i] = (x >> 16) & 0xff
23n/a return bytes(out)
24n/a
25n/adef pysiphash(uint64):
26n/a """Convert SipHash24 output to Py_hash_t
27n/a """
28n/a assert 0 <= uint64 < (1 << 64)
29n/a # simple unsigned to signed int64
30n/a if uint64 > (1 << 63) - 1:
31n/a int64 = uint64 - (1 << 64)
32n/a else:
33n/a int64 = uint64
34n/a # mangle uint64 to uint32
35n/a uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff
36n/a # simple unsigned to signed int32
37n/a if uint32 > (1 << 31) - 1:
38n/a int32 = uint32 - (1 << 32)
39n/a else:
40n/a int32 = uint32
41n/a return int32, int64
42n/a
43n/adef skip_unless_internalhash(test):
44n/a """Skip decorator for tests that depend on SipHash24 or FNV"""
45n/a ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
46n/a msg = "Requires SipHash24 or FNV"
47n/a return test if ok else unittest.skip(msg)(test)
48n/a
49n/a
50n/aclass HashEqualityTestCase(unittest.TestCase):
51n/a
52n/a def same_hash(self, *objlist):
53n/a # Hash each object given and fail if
54n/a # the hash values are not all the same.
55n/a hashed = list(map(hash, objlist))
56n/a for h in hashed[1:]:
57n/a if h != hashed[0]:
58n/a self.fail("hashed values differ: %r" % (objlist,))
59n/a
60n/a def test_numeric_literals(self):
61n/a self.same_hash(1, 1, 1.0, 1.0+0.0j)
62n/a self.same_hash(0, 0.0, 0.0+0.0j)
63n/a self.same_hash(-1, -1.0, -1.0+0.0j)
64n/a self.same_hash(-2, -2.0, -2.0+0.0j)
65n/a
66n/a def test_coerced_integers(self):
67n/a self.same_hash(int(1), int(1), float(1), complex(1),
68n/a int('1'), float('1.0'))
69n/a self.same_hash(int(-2**31), float(-2**31))
70n/a self.same_hash(int(1-2**31), float(1-2**31))
71n/a self.same_hash(int(2**31-1), float(2**31-1))
72n/a # for 64-bit platforms
73n/a self.same_hash(int(2**31), float(2**31))
74n/a self.same_hash(int(-2**63), float(-2**63))
75n/a self.same_hash(int(2**63), float(2**63))
76n/a
77n/a def test_coerced_floats(self):
78n/a self.same_hash(int(1.23e300), float(1.23e300))
79n/a self.same_hash(float(0.5), complex(0.5, 0.0))
80n/a
81n/a def test_unaligned_buffers(self):
82n/a # The hash function for bytes-like objects shouldn't have
83n/a # alignment-dependent results (example in issue #16427).
84n/a b = b"123456789abcdefghijklmnopqrstuvwxyz" * 128
85n/a for i in range(16):
86n/a for j in range(16):
87n/a aligned = b[i:128+j]
88n/a unaligned = memoryview(b)[i:128+j]
89n/a self.assertEqual(hash(aligned), hash(unaligned))
90n/a
91n/a
92n/a_default_hash = object.__hash__
93n/aclass DefaultHash(object): pass
94n/a
95n/a_FIXED_HASH_VALUE = 42
96n/aclass FixedHash(object):
97n/a def __hash__(self):
98n/a return _FIXED_HASH_VALUE
99n/a
100n/aclass OnlyEquality(object):
101n/a def __eq__(self, other):
102n/a return self is other
103n/a
104n/aclass OnlyInequality(object):
105n/a def __ne__(self, other):
106n/a return self is not other
107n/a
108n/aclass InheritedHashWithEquality(FixedHash, OnlyEquality): pass
109n/aclass InheritedHashWithInequality(FixedHash, OnlyInequality): pass
110n/a
111n/aclass NoHash(object):
112n/a __hash__ = None
113n/a
114n/aclass HashInheritanceTestCase(unittest.TestCase):
115n/a default_expected = [object(),
116n/a DefaultHash(),
117n/a OnlyInequality(),
118n/a ]
119n/a fixed_expected = [FixedHash(),
120n/a InheritedHashWithEquality(),
121n/a InheritedHashWithInequality(),
122n/a ]
123n/a error_expected = [NoHash(),
124n/a OnlyEquality(),
125n/a ]
126n/a
127n/a def test_default_hash(self):
128n/a for obj in self.default_expected:
129n/a self.assertEqual(hash(obj), _default_hash(obj))
130n/a
131n/a def test_fixed_hash(self):
132n/a for obj in self.fixed_expected:
133n/a self.assertEqual(hash(obj), _FIXED_HASH_VALUE)
134n/a
135n/a def test_error_hash(self):
136n/a for obj in self.error_expected:
137n/a self.assertRaises(TypeError, hash, obj)
138n/a
139n/a def test_hashable(self):
140n/a objects = (self.default_expected +
141n/a self.fixed_expected)
142n/a for obj in objects:
143n/a self.assertIsInstance(obj, Hashable)
144n/a
145n/a def test_not_hashable(self):
146n/a for obj in self.error_expected:
147n/a self.assertNotIsInstance(obj, Hashable)
148n/a
149n/a
150n/a# Issue #4701: Check that some builtin types are correctly hashable
151n/aclass DefaultIterSeq(object):
152n/a seq = range(10)
153n/a def __len__(self):
154n/a return len(self.seq)
155n/a def __getitem__(self, index):
156n/a return self.seq[index]
157n/a
158n/aclass HashBuiltinsTestCase(unittest.TestCase):
159n/a hashes_to_check = [enumerate(range(10)),
160n/a iter(DefaultIterSeq()),
161n/a iter(lambda: 0, 0),
162n/a ]
163n/a
164n/a def test_hashes(self):
165n/a _default_hash = object.__hash__
166n/a for obj in self.hashes_to_check:
167n/a self.assertEqual(hash(obj), _default_hash(obj))
168n/a
169n/aclass HashRandomizationTests:
170n/a
171n/a # Each subclass should define a field "repr_", containing the repr() of
172n/a # an object to be tested
173n/a
174n/a def get_hash_command(self, repr_):
175n/a return 'print(hash(eval(%a)))' % repr_
176n/a
177n/a def get_hash(self, repr_, seed=None):
178n/a env = os.environ.copy()
179n/a env['__cleanenv'] = True # signal to assert_python not to do a copy
180n/a # of os.environ on its own
181n/a if seed is not None:
182n/a env['PYTHONHASHSEED'] = str(seed)
183n/a else:
184n/a env.pop('PYTHONHASHSEED', None)
185n/a out = assert_python_ok(
186n/a '-c', self.get_hash_command(repr_),
187n/a **env)
188n/a stdout = out[1].strip()
189n/a return int(stdout)
190n/a
191n/a def test_randomized_hash(self):
192n/a # two runs should return different hashes
193n/a run1 = self.get_hash(self.repr_, seed='random')
194n/a run2 = self.get_hash(self.repr_, seed='random')
195n/a self.assertNotEqual(run1, run2)
196n/a
197n/aclass StringlikeHashRandomizationTests(HashRandomizationTests):
198n/a repr_ = None
199n/a repr_long = None
200n/a
201n/a # 32bit little, 64bit little, 32bit big, 64bit big
202n/a known_hashes = {
203n/a 'djba33x': [ # only used for small strings
204n/a # seed 0, 'abc'
205n/a [193485960, 193485960, 193485960, 193485960],
206n/a # seed 42, 'abc'
207n/a [-678966196, 573763426263223372, -820489388, -4282905804826039665],
208n/a ],
209n/a 'siphash24': [
210n/a # NOTE: PyUCS2 layout depends on endianess
211n/a # seed 0, 'abc'
212n/a [1198583518, 4596069200710135518, 1198583518, 4596069200710135518],
213n/a # seed 42, 'abc'
214n/a [273876886, -4501618152524544106, 273876886, -4501618152524544106],
215n/a # seed 42, 'abcdefghijk'
216n/a [-1745215313, 4436719588892876975, -1745215313, 4436719588892876975],
217n/a # seed 0, 'äú∑ℇ'
218n/a [493570806, 5749986484189612790, -1006381564, -5915111450199468540],
219n/a # seed 42, 'äú∑ℇ'
220n/a [-1677110816, -2947981342227738144, -1860207793, -4296699217652516017],
221n/a ],
222n/a 'fnv': [
223n/a # seed 0, 'abc'
224n/a [-1600925533, 1453079729188098211, -1600925533,
225n/a 1453079729188098211],
226n/a # seed 42, 'abc'
227n/a [-206076799, -4410911502303878509, -1024014457,
228n/a -3570150969479994130],
229n/a # seed 42, 'abcdefghijk'
230n/a [811136751, -5046230049376118746, -77208053 ,
231n/a -4779029615281019666],
232n/a # seed 0, 'äú∑ℇ'
233n/a [44402817, 8998297579845987431, -1956240331,
234n/a -782697888614047887],
235n/a # seed 42, 'äú∑ℇ'
236n/a [-283066365, -4576729883824601543, -271871407,
237n/a -3927695501187247084],
238n/a ]
239n/a }
240n/a
241n/a def get_expected_hash(self, position, length):
242n/a if length < sys.hash_info.cutoff:
243n/a algorithm = "djba33x"
244n/a else:
245n/a algorithm = sys.hash_info.algorithm
246n/a if sys.byteorder == 'little':
247n/a platform = 1 if IS_64BIT else 0
248n/a else:
249n/a assert(sys.byteorder == 'big')
250n/a platform = 3 if IS_64BIT else 2
251n/a return self.known_hashes[algorithm][position][platform]
252n/a
253n/a def test_null_hash(self):
254n/a # PYTHONHASHSEED=0 disables the randomized hash
255n/a known_hash_of_obj = self.get_expected_hash(0, 3)
256n/a
257n/a # Randomization is enabled by default:
258n/a self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
259n/a
260n/a # It can also be disabled by setting the seed to 0:
261n/a self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
262n/a
263n/a @skip_unless_internalhash
264n/a def test_fixed_hash(self):
265n/a # test a fixed seed for the randomized hash
266n/a # Note that all types share the same values:
267n/a h = self.get_expected_hash(1, 3)
268n/a self.assertEqual(self.get_hash(self.repr_, seed=42), h)
269n/a
270n/a @skip_unless_internalhash
271n/a def test_long_fixed_hash(self):
272n/a if self.repr_long is None:
273n/a return
274n/a h = self.get_expected_hash(2, 11)
275n/a self.assertEqual(self.get_hash(self.repr_long, seed=42), h)
276n/a
277n/a
278n/aclass StrHashRandomizationTests(StringlikeHashRandomizationTests,
279n/a unittest.TestCase):
280n/a repr_ = repr('abc')
281n/a repr_long = repr('abcdefghijk')
282n/a repr_ucs2 = repr('äú∑ℇ')
283n/a
284n/a @skip_unless_internalhash
285n/a def test_empty_string(self):
286n/a self.assertEqual(hash(""), 0)
287n/a
288n/a @skip_unless_internalhash
289n/a def test_ucs2_string(self):
290n/a h = self.get_expected_hash(3, 6)
291n/a self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h)
292n/a h = self.get_expected_hash(4, 6)
293n/a self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h)
294n/a
295n/aclass BytesHashRandomizationTests(StringlikeHashRandomizationTests,
296n/a unittest.TestCase):
297n/a repr_ = repr(b'abc')
298n/a repr_long = repr(b'abcdefghijk')
299n/a
300n/a @skip_unless_internalhash
301n/a def test_empty_string(self):
302n/a self.assertEqual(hash(b""), 0)
303n/a
304n/aclass MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests,
305n/a unittest.TestCase):
306n/a repr_ = "memoryview(b'abc')"
307n/a repr_long = "memoryview(b'abcdefghijk')"
308n/a
309n/a @skip_unless_internalhash
310n/a def test_empty_string(self):
311n/a self.assertEqual(hash(memoryview(b"")), 0)
312n/a
313n/aclass DatetimeTests(HashRandomizationTests):
314n/a def get_hash_command(self, repr_):
315n/a return 'import datetime; print(hash(%s))' % repr_
316n/a
317n/aclass DatetimeDateTests(DatetimeTests, unittest.TestCase):
318n/a repr_ = repr(datetime.date(1066, 10, 14))
319n/a
320n/aclass DatetimeDatetimeTests(DatetimeTests, unittest.TestCase):
321n/a repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
322n/a
323n/aclass DatetimeTimeTests(DatetimeTests, unittest.TestCase):
324n/a repr_ = repr(datetime.time(0))
325n/a
326n/a
327n/aclass HashDistributionTestCase(unittest.TestCase):
328n/a
329n/a def test_hash_distribution(self):
330n/a # check for hash collision
331n/a base = "abcdefghabcdefg"
332n/a for i in range(1, len(base)):
333n/a prefix = base[:i]
334n/a with self.subTest(prefix=prefix):
335n/a s15 = set()
336n/a s255 = set()
337n/a for c in range(256):
338n/a h = hash(prefix + chr(c))
339n/a s15.add(h & 0xf)
340n/a s255.add(h & 0xff)
341n/a # SipHash24 distribution depends on key, usually > 60%
342n/a self.assertGreater(len(s15), 8, prefix)
343n/a self.assertGreater(len(s255), 128, prefix)
344n/a
345n/aif __name__ == "__main__":
346n/a unittest.main()