| 1 | n/a | # test the invariant that |
|---|
| 2 | n/a | # iff a==b then hash(a)==hash(b) |
|---|
| 3 | n/a | # |
|---|
| 4 | n/a | # Also test that hash implementations are inherited as expected |
|---|
| 5 | n/a | |
|---|
| 6 | n/a | import datetime |
|---|
| 7 | n/a | import os |
|---|
| 8 | n/a | import sys |
|---|
| 9 | n/a | import unittest |
|---|
| 10 | n/a | from test.support.script_helper import assert_python_ok |
|---|
| 11 | n/a | from collections import Hashable |
|---|
| 12 | n/a | |
|---|
| 13 | n/a | IS_64BIT = sys.maxsize > 2**32 |
|---|
| 14 | n/a | |
|---|
| 15 | n/a | def lcg(x, length=16): |
|---|
| 16 | n/a | """Linear congruential generator""" |
|---|
| 17 | n/a | if x == 0: |
|---|
| 18 | n/a | return bytes(length) |
|---|
| 19 | n/a | out = bytearray(length) |
|---|
| 20 | n/a | for i in range(length): |
|---|
| 21 | n/a | x = (214013 * x + 2531011) & 0x7fffffff |
|---|
| 22 | n/a | out[i] = (x >> 16) & 0xff |
|---|
| 23 | n/a | return bytes(out) |
|---|
| 24 | n/a | |
|---|
| 25 | n/a | def pysiphash(uint64): |
|---|
| 26 | n/a | """Convert SipHash24 output to Py_hash_t |
|---|
| 27 | n/a | """ |
|---|
| 28 | n/a | assert 0 <= uint64 < (1 << 64) |
|---|
| 29 | n/a | # simple unsigned to signed int64 |
|---|
| 30 | n/a | if uint64 > (1 << 63) - 1: |
|---|
| 31 | n/a | int64 = uint64 - (1 << 64) |
|---|
| 32 | n/a | else: |
|---|
| 33 | n/a | int64 = uint64 |
|---|
| 34 | n/a | # mangle uint64 to uint32 |
|---|
| 35 | n/a | uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff |
|---|
| 36 | n/a | # simple unsigned to signed int32 |
|---|
| 37 | n/a | if uint32 > (1 << 31) - 1: |
|---|
| 38 | n/a | int32 = uint32 - (1 << 32) |
|---|
| 39 | n/a | else: |
|---|
| 40 | n/a | int32 = uint32 |
|---|
| 41 | n/a | return int32, int64 |
|---|
| 42 | n/a | |
|---|
| 43 | n/a | def skip_unless_internalhash(test): |
|---|
| 44 | n/a | """Skip decorator for tests that depend on SipHash24 or FNV""" |
|---|
| 45 | n/a | ok = sys.hash_info.algorithm in {"fnv", "siphash24"} |
|---|
| 46 | n/a | msg = "Requires SipHash24 or FNV" |
|---|
| 47 | n/a | return test if ok else unittest.skip(msg)(test) |
|---|
| 48 | n/a | |
|---|
| 49 | n/a | |
|---|
| 50 | n/a | class HashEqualityTestCase(unittest.TestCase): |
|---|
| 51 | n/a | |
|---|
| 52 | n/a | def same_hash(self, *objlist): |
|---|
| 53 | n/a | # Hash each object given and fail if |
|---|
| 54 | n/a | # the hash values are not all the same. |
|---|
| 55 | n/a | hashed = list(map(hash, objlist)) |
|---|
| 56 | n/a | for h in hashed[1:]: |
|---|
| 57 | n/a | if h != hashed[0]: |
|---|
| 58 | n/a | self.fail("hashed values differ: %r" % (objlist,)) |
|---|
| 59 | n/a | |
|---|
| 60 | n/a | def test_numeric_literals(self): |
|---|
| 61 | n/a | self.same_hash(1, 1, 1.0, 1.0+0.0j) |
|---|
| 62 | n/a | self.same_hash(0, 0.0, 0.0+0.0j) |
|---|
| 63 | n/a | self.same_hash(-1, -1.0, -1.0+0.0j) |
|---|
| 64 | n/a | self.same_hash(-2, -2.0, -2.0+0.0j) |
|---|
| 65 | n/a | |
|---|
| 66 | n/a | def test_coerced_integers(self): |
|---|
| 67 | n/a | self.same_hash(int(1), int(1), float(1), complex(1), |
|---|
| 68 | n/a | int('1'), float('1.0')) |
|---|
| 69 | n/a | self.same_hash(int(-2**31), float(-2**31)) |
|---|
| 70 | n/a | self.same_hash(int(1-2**31), float(1-2**31)) |
|---|
| 71 | n/a | self.same_hash(int(2**31-1), float(2**31-1)) |
|---|
| 72 | n/a | # for 64-bit platforms |
|---|
| 73 | n/a | self.same_hash(int(2**31), float(2**31)) |
|---|
| 74 | n/a | self.same_hash(int(-2**63), float(-2**63)) |
|---|
| 75 | n/a | self.same_hash(int(2**63), float(2**63)) |
|---|
| 76 | n/a | |
|---|
| 77 | n/a | def test_coerced_floats(self): |
|---|
| 78 | n/a | self.same_hash(int(1.23e300), float(1.23e300)) |
|---|
| 79 | n/a | self.same_hash(float(0.5), complex(0.5, 0.0)) |
|---|
| 80 | n/a | |
|---|
| 81 | n/a | def test_unaligned_buffers(self): |
|---|
| 82 | n/a | # The hash function for bytes-like objects shouldn't have |
|---|
| 83 | n/a | # alignment-dependent results (example in issue #16427). |
|---|
| 84 | n/a | b = b"123456789abcdefghijklmnopqrstuvwxyz" * 128 |
|---|
| 85 | n/a | for i in range(16): |
|---|
| 86 | n/a | for j in range(16): |
|---|
| 87 | n/a | aligned = b[i:128+j] |
|---|
| 88 | n/a | unaligned = memoryview(b)[i:128+j] |
|---|
| 89 | n/a | self.assertEqual(hash(aligned), hash(unaligned)) |
|---|
| 90 | n/a | |
|---|
| 91 | n/a | |
|---|
| 92 | n/a | _default_hash = object.__hash__ |
|---|
| 93 | n/a | class DefaultHash(object): pass |
|---|
| 94 | n/a | |
|---|
| 95 | n/a | _FIXED_HASH_VALUE = 42 |
|---|
| 96 | n/a | class FixedHash(object): |
|---|
| 97 | n/a | def __hash__(self): |
|---|
| 98 | n/a | return _FIXED_HASH_VALUE |
|---|
| 99 | n/a | |
|---|
| 100 | n/a | class OnlyEquality(object): |
|---|
| 101 | n/a | def __eq__(self, other): |
|---|
| 102 | n/a | return self is other |
|---|
| 103 | n/a | |
|---|
| 104 | n/a | class OnlyInequality(object): |
|---|
| 105 | n/a | def __ne__(self, other): |
|---|
| 106 | n/a | return self is not other |
|---|
| 107 | n/a | |
|---|
| 108 | n/a | class InheritedHashWithEquality(FixedHash, OnlyEquality): pass |
|---|
| 109 | n/a | class InheritedHashWithInequality(FixedHash, OnlyInequality): pass |
|---|
| 110 | n/a | |
|---|
| 111 | n/a | class NoHash(object): |
|---|
| 112 | n/a | __hash__ = None |
|---|
| 113 | n/a | |
|---|
| 114 | n/a | class HashInheritanceTestCase(unittest.TestCase): |
|---|
| 115 | n/a | default_expected = [object(), |
|---|
| 116 | n/a | DefaultHash(), |
|---|
| 117 | n/a | OnlyInequality(), |
|---|
| 118 | n/a | ] |
|---|
| 119 | n/a | fixed_expected = [FixedHash(), |
|---|
| 120 | n/a | InheritedHashWithEquality(), |
|---|
| 121 | n/a | InheritedHashWithInequality(), |
|---|
| 122 | n/a | ] |
|---|
| 123 | n/a | error_expected = [NoHash(), |
|---|
| 124 | n/a | OnlyEquality(), |
|---|
| 125 | n/a | ] |
|---|
| 126 | n/a | |
|---|
| 127 | n/a | def test_default_hash(self): |
|---|
| 128 | n/a | for obj in self.default_expected: |
|---|
| 129 | n/a | self.assertEqual(hash(obj), _default_hash(obj)) |
|---|
| 130 | n/a | |
|---|
| 131 | n/a | def test_fixed_hash(self): |
|---|
| 132 | n/a | for obj in self.fixed_expected: |
|---|
| 133 | n/a | self.assertEqual(hash(obj), _FIXED_HASH_VALUE) |
|---|
| 134 | n/a | |
|---|
| 135 | n/a | def test_error_hash(self): |
|---|
| 136 | n/a | for obj in self.error_expected: |
|---|
| 137 | n/a | self.assertRaises(TypeError, hash, obj) |
|---|
| 138 | n/a | |
|---|
| 139 | n/a | def test_hashable(self): |
|---|
| 140 | n/a | objects = (self.default_expected + |
|---|
| 141 | n/a | self.fixed_expected) |
|---|
| 142 | n/a | for obj in objects: |
|---|
| 143 | n/a | self.assertIsInstance(obj, Hashable) |
|---|
| 144 | n/a | |
|---|
| 145 | n/a | def test_not_hashable(self): |
|---|
| 146 | n/a | for obj in self.error_expected: |
|---|
| 147 | n/a | self.assertNotIsInstance(obj, Hashable) |
|---|
| 148 | n/a | |
|---|
| 149 | n/a | |
|---|
| 150 | n/a | # Issue #4701: Check that some builtin types are correctly hashable |
|---|
| 151 | n/a | class DefaultIterSeq(object): |
|---|
| 152 | n/a | seq = range(10) |
|---|
| 153 | n/a | def __len__(self): |
|---|
| 154 | n/a | return len(self.seq) |
|---|
| 155 | n/a | def __getitem__(self, index): |
|---|
| 156 | n/a | return self.seq[index] |
|---|
| 157 | n/a | |
|---|
| 158 | n/a | class HashBuiltinsTestCase(unittest.TestCase): |
|---|
| 159 | n/a | hashes_to_check = [enumerate(range(10)), |
|---|
| 160 | n/a | iter(DefaultIterSeq()), |
|---|
| 161 | n/a | iter(lambda: 0, 0), |
|---|
| 162 | n/a | ] |
|---|
| 163 | n/a | |
|---|
| 164 | n/a | def test_hashes(self): |
|---|
| 165 | n/a | _default_hash = object.__hash__ |
|---|
| 166 | n/a | for obj in self.hashes_to_check: |
|---|
| 167 | n/a | self.assertEqual(hash(obj), _default_hash(obj)) |
|---|
| 168 | n/a | |
|---|
| 169 | n/a | class HashRandomizationTests: |
|---|
| 170 | n/a | |
|---|
| 171 | n/a | # Each subclass should define a field "repr_", containing the repr() of |
|---|
| 172 | n/a | # an object to be tested |
|---|
| 173 | n/a | |
|---|
| 174 | n/a | def get_hash_command(self, repr_): |
|---|
| 175 | n/a | return 'print(hash(eval(%a)))' % repr_ |
|---|
| 176 | n/a | |
|---|
| 177 | n/a | def get_hash(self, repr_, seed=None): |
|---|
| 178 | n/a | env = os.environ.copy() |
|---|
| 179 | n/a | env['__cleanenv'] = True # signal to assert_python not to do a copy |
|---|
| 180 | n/a | # of os.environ on its own |
|---|
| 181 | n/a | if seed is not None: |
|---|
| 182 | n/a | env['PYTHONHASHSEED'] = str(seed) |
|---|
| 183 | n/a | else: |
|---|
| 184 | n/a | env.pop('PYTHONHASHSEED', None) |
|---|
| 185 | n/a | out = assert_python_ok( |
|---|
| 186 | n/a | '-c', self.get_hash_command(repr_), |
|---|
| 187 | n/a | **env) |
|---|
| 188 | n/a | stdout = out[1].strip() |
|---|
| 189 | n/a | return int(stdout) |
|---|
| 190 | n/a | |
|---|
| 191 | n/a | def test_randomized_hash(self): |
|---|
| 192 | n/a | # two runs should return different hashes |
|---|
| 193 | n/a | run1 = self.get_hash(self.repr_, seed='random') |
|---|
| 194 | n/a | run2 = self.get_hash(self.repr_, seed='random') |
|---|
| 195 | n/a | self.assertNotEqual(run1, run2) |
|---|
| 196 | n/a | |
|---|
| 197 | n/a | class StringlikeHashRandomizationTests(HashRandomizationTests): |
|---|
| 198 | n/a | repr_ = None |
|---|
| 199 | n/a | repr_long = None |
|---|
| 200 | n/a | |
|---|
| 201 | n/a | # 32bit little, 64bit little, 32bit big, 64bit big |
|---|
| 202 | n/a | known_hashes = { |
|---|
| 203 | n/a | 'djba33x': [ # only used for small strings |
|---|
| 204 | n/a | # seed 0, 'abc' |
|---|
| 205 | n/a | [193485960, 193485960, 193485960, 193485960], |
|---|
| 206 | n/a | # seed 42, 'abc' |
|---|
| 207 | n/a | [-678966196, 573763426263223372, -820489388, -4282905804826039665], |
|---|
| 208 | n/a | ], |
|---|
| 209 | n/a | 'siphash24': [ |
|---|
| 210 | n/a | # NOTE: PyUCS2 layout depends on endianess |
|---|
| 211 | n/a | # seed 0, 'abc' |
|---|
| 212 | n/a | [1198583518, 4596069200710135518, 1198583518, 4596069200710135518], |
|---|
| 213 | n/a | # seed 42, 'abc' |
|---|
| 214 | n/a | [273876886, -4501618152524544106, 273876886, -4501618152524544106], |
|---|
| 215 | n/a | # seed 42, 'abcdefghijk' |
|---|
| 216 | n/a | [-1745215313, 4436719588892876975, -1745215313, 4436719588892876975], |
|---|
| 217 | n/a | # seed 0, 'äúââ' |
|---|
| 218 | n/a | [493570806, 5749986484189612790, -1006381564, -5915111450199468540], |
|---|
| 219 | n/a | # seed 42, 'äúââ' |
|---|
| 220 | n/a | [-1677110816, -2947981342227738144, -1860207793, -4296699217652516017], |
|---|
| 221 | n/a | ], |
|---|
| 222 | n/a | 'fnv': [ |
|---|
| 223 | n/a | # seed 0, 'abc' |
|---|
| 224 | n/a | [-1600925533, 1453079729188098211, -1600925533, |
|---|
| 225 | n/a | 1453079729188098211], |
|---|
| 226 | n/a | # seed 42, 'abc' |
|---|
| 227 | n/a | [-206076799, -4410911502303878509, -1024014457, |
|---|
| 228 | n/a | -3570150969479994130], |
|---|
| 229 | n/a | # seed 42, 'abcdefghijk' |
|---|
| 230 | n/a | [811136751, -5046230049376118746, -77208053 , |
|---|
| 231 | n/a | -4779029615281019666], |
|---|
| 232 | n/a | # seed 0, 'äúââ' |
|---|
| 233 | n/a | [44402817, 8998297579845987431, -1956240331, |
|---|
| 234 | n/a | -782697888614047887], |
|---|
| 235 | n/a | # seed 42, 'äúââ' |
|---|
| 236 | n/a | [-283066365, -4576729883824601543, -271871407, |
|---|
| 237 | n/a | -3927695501187247084], |
|---|
| 238 | n/a | ] |
|---|
| 239 | n/a | } |
|---|
| 240 | n/a | |
|---|
| 241 | n/a | def get_expected_hash(self, position, length): |
|---|
| 242 | n/a | if length < sys.hash_info.cutoff: |
|---|
| 243 | n/a | algorithm = "djba33x" |
|---|
| 244 | n/a | else: |
|---|
| 245 | n/a | algorithm = sys.hash_info.algorithm |
|---|
| 246 | n/a | if sys.byteorder == 'little': |
|---|
| 247 | n/a | platform = 1 if IS_64BIT else 0 |
|---|
| 248 | n/a | else: |
|---|
| 249 | n/a | assert(sys.byteorder == 'big') |
|---|
| 250 | n/a | platform = 3 if IS_64BIT else 2 |
|---|
| 251 | n/a | return self.known_hashes[algorithm][position][platform] |
|---|
| 252 | n/a | |
|---|
| 253 | n/a | def test_null_hash(self): |
|---|
| 254 | n/a | # PYTHONHASHSEED=0 disables the randomized hash |
|---|
| 255 | n/a | known_hash_of_obj = self.get_expected_hash(0, 3) |
|---|
| 256 | n/a | |
|---|
| 257 | n/a | # Randomization is enabled by default: |
|---|
| 258 | n/a | self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj) |
|---|
| 259 | n/a | |
|---|
| 260 | n/a | # It can also be disabled by setting the seed to 0: |
|---|
| 261 | n/a | self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) |
|---|
| 262 | n/a | |
|---|
| 263 | n/a | @skip_unless_internalhash |
|---|
| 264 | n/a | def test_fixed_hash(self): |
|---|
| 265 | n/a | # test a fixed seed for the randomized hash |
|---|
| 266 | n/a | # Note that all types share the same values: |
|---|
| 267 | n/a | h = self.get_expected_hash(1, 3) |
|---|
| 268 | n/a | self.assertEqual(self.get_hash(self.repr_, seed=42), h) |
|---|
| 269 | n/a | |
|---|
| 270 | n/a | @skip_unless_internalhash |
|---|
| 271 | n/a | def test_long_fixed_hash(self): |
|---|
| 272 | n/a | if self.repr_long is None: |
|---|
| 273 | n/a | return |
|---|
| 274 | n/a | h = self.get_expected_hash(2, 11) |
|---|
| 275 | n/a | self.assertEqual(self.get_hash(self.repr_long, seed=42), h) |
|---|
| 276 | n/a | |
|---|
| 277 | n/a | |
|---|
| 278 | n/a | class StrHashRandomizationTests(StringlikeHashRandomizationTests, |
|---|
| 279 | n/a | unittest.TestCase): |
|---|
| 280 | n/a | repr_ = repr('abc') |
|---|
| 281 | n/a | repr_long = repr('abcdefghijk') |
|---|
| 282 | n/a | repr_ucs2 = repr('äúââ') |
|---|
| 283 | n/a | |
|---|
| 284 | n/a | @skip_unless_internalhash |
|---|
| 285 | n/a | def test_empty_string(self): |
|---|
| 286 | n/a | self.assertEqual(hash(""), 0) |
|---|
| 287 | n/a | |
|---|
| 288 | n/a | @skip_unless_internalhash |
|---|
| 289 | n/a | def test_ucs2_string(self): |
|---|
| 290 | n/a | h = self.get_expected_hash(3, 6) |
|---|
| 291 | n/a | self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h) |
|---|
| 292 | n/a | h = self.get_expected_hash(4, 6) |
|---|
| 293 | n/a | self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h) |
|---|
| 294 | n/a | |
|---|
| 295 | n/a | class BytesHashRandomizationTests(StringlikeHashRandomizationTests, |
|---|
| 296 | n/a | unittest.TestCase): |
|---|
| 297 | n/a | repr_ = repr(b'abc') |
|---|
| 298 | n/a | repr_long = repr(b'abcdefghijk') |
|---|
| 299 | n/a | |
|---|
| 300 | n/a | @skip_unless_internalhash |
|---|
| 301 | n/a | def test_empty_string(self): |
|---|
| 302 | n/a | self.assertEqual(hash(b""), 0) |
|---|
| 303 | n/a | |
|---|
| 304 | n/a | class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests, |
|---|
| 305 | n/a | unittest.TestCase): |
|---|
| 306 | n/a | repr_ = "memoryview(b'abc')" |
|---|
| 307 | n/a | repr_long = "memoryview(b'abcdefghijk')" |
|---|
| 308 | n/a | |
|---|
| 309 | n/a | @skip_unless_internalhash |
|---|
| 310 | n/a | def test_empty_string(self): |
|---|
| 311 | n/a | self.assertEqual(hash(memoryview(b"")), 0) |
|---|
| 312 | n/a | |
|---|
| 313 | n/a | class DatetimeTests(HashRandomizationTests): |
|---|
| 314 | n/a | def get_hash_command(self, repr_): |
|---|
| 315 | n/a | return 'import datetime; print(hash(%s))' % repr_ |
|---|
| 316 | n/a | |
|---|
| 317 | n/a | class DatetimeDateTests(DatetimeTests, unittest.TestCase): |
|---|
| 318 | n/a | repr_ = repr(datetime.date(1066, 10, 14)) |
|---|
| 319 | n/a | |
|---|
| 320 | n/a | class DatetimeDatetimeTests(DatetimeTests, unittest.TestCase): |
|---|
| 321 | n/a | repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7)) |
|---|
| 322 | n/a | |
|---|
| 323 | n/a | class DatetimeTimeTests(DatetimeTests, unittest.TestCase): |
|---|
| 324 | n/a | repr_ = repr(datetime.time(0)) |
|---|
| 325 | n/a | |
|---|
| 326 | n/a | |
|---|
| 327 | n/a | class HashDistributionTestCase(unittest.TestCase): |
|---|
| 328 | n/a | |
|---|
| 329 | n/a | def test_hash_distribution(self): |
|---|
| 330 | n/a | # check for hash collision |
|---|
| 331 | n/a | base = "abcdefghabcdefg" |
|---|
| 332 | n/a | for i in range(1, len(base)): |
|---|
| 333 | n/a | prefix = base[:i] |
|---|
| 334 | n/a | with self.subTest(prefix=prefix): |
|---|
| 335 | n/a | s15 = set() |
|---|
| 336 | n/a | s255 = set() |
|---|
| 337 | n/a | for c in range(256): |
|---|
| 338 | n/a | h = hash(prefix + chr(c)) |
|---|
| 339 | n/a | s15.add(h & 0xf) |
|---|
| 340 | n/a | s255.add(h & 0xff) |
|---|
| 341 | n/a | # SipHash24 distribution depends on key, usually > 60% |
|---|
| 342 | n/a | self.assertGreater(len(s15), 8, prefix) |
|---|
| 343 | n/a | self.assertGreater(len(s255), 128, prefix) |
|---|
| 344 | n/a | |
|---|
| 345 | n/a | if __name__ == "__main__": |
|---|
| 346 | n/a | unittest.main() |
|---|