Python code coverage for Python/pyhash.c

#	count	content
1	n/a	/* Set of hash utility functions to help maintaining the invariant that
2	n/a	if a==b then hash(a)==hash(b)
3	n/a
4	n/a	All the utility functions (_Py_Hash*()) return "-1" to signify an error.
5	n/a	*/
6	n/a	#include "Python.h"
7	n/a
8	n/a	#ifdef __APPLE__
9	n/a	# include <libkern/OSByteOrder.h>
10	n/a	#elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
11	n/a	# include <endian.h>
12	n/a	#elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
13	n/a	# include <sys/endian.h>
14	n/a	#endif
15	n/a
16	n/a	#ifdef __cplusplus
17	n/a	extern "C" {
18	n/a	#endif
19	n/a
20	n/a	_Py_HashSecret_t _Py_HashSecret;
21	n/a
22	n/a	#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
23	n/a	extern PyHash_FuncDef PyHash_Func;
24	n/a	#else
25	n/a	static PyHash_FuncDef PyHash_Func;
26	n/a	#endif
27	n/a
28	n/a	/* Count _Py_HashBytes() calls */
29	n/a	#ifdef Py_HASH_STATS
30	n/a	#define Py_HASH_STATS_MAX 32
31	n/a	static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
32	n/a	#endif
33	n/a
34	n/a	/* For numeric types, the hash of a number x is based on the reduction
35	n/a	of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
36	n/a	hash(x) == hash(y) whenever x and y are numerically equal, even if
37	n/a	x and y have different types.
38	n/a
39	n/a	A quick summary of the hashing strategy:
40	n/a
41	n/a	(1) First define the 'reduction of x modulo P' for any rational
42	n/a	number x; this is a standard extension of the usual notion of
43	n/a	reduction modulo P for integers. If x == p/q (written in lowest
44	n/a	terms), the reduction is interpreted as the reduction of p times
45	n/a	the inverse of the reduction of q, all modulo P; if q is exactly
46	n/a	divisible by P then define the reduction to be infinity. So we've
47	n/a	got a well-defined map
48	n/a
49	n/a	reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
50	n/a
51	n/a	(2) Now for a rational number x, define hash(x) by:
52	n/a
53	n/a	reduce(x) if x >= 0
54	n/a	-reduce(-x) if x < 0
55	n/a
56	n/a	If the result of the reduction is infinity (this is impossible for
57	n/a	integers, floats and Decimals) then use the predefined hash value
58	n/a	_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
59	n/a	_PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
60	n/a	hashes of float and Decimal infinities and nans.
61	n/a
62	n/a	A selling point for the above strategy is that it makes it possible
63	n/a	to compute hashes of decimal and binary floating-point numbers
64	n/a	efficiently, even if the exponent of the binary or decimal number
65	n/a	is large. The key point is that
66	n/a
67	n/a	reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
68	n/a
69	n/a	provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
70	n/a	binary or decimal float is never infinity, since the denominator is a power
71	n/a	of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
72	n/a	for nonnegative x,
73	n/a
74	n/a	reduce(x * 2*e) == reduce(x) reduce(2**e) % _PyHASH_MODULUS
75	n/a
76	n/a	reduce(x * 10*e) == reduce(x) reduce(10**e) % _PyHASH_MODULUS
77	n/a
78	n/a	and reduce(10**e) can be computed efficiently by the usual modular
79	n/a	exponentiation algorithm. For reduce(2**e) it's even better: since
80	n/a	P is of the form 2n-1, reduce(2e) is 2**(e mod n), and multiplication
81	n/a	by 2(e mod n) modulo 2n-1 just amounts to a rotation of bits.
82	n/a
83	n/a	*/
84	n/a
85	n/a	Py_hash_t
86	n/a	_Py_HashDouble(double v)
87	n/a	{
88	n/a	int e, sign;
89	n/a	double m;
90	n/a	Py_uhash_t x, y;
91	n/a
92	n/a	if (!Py_IS_FINITE(v)) {
93	n/a	if (Py_IS_INFINITY(v))
94	n/a	return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
95	n/a	else
96	n/a	return _PyHASH_NAN;
97	n/a	}
98	n/a
99	n/a	m = frexp(v, &e);
100	n/a
101	n/a	sign = 1;
102	n/a	if (m < 0) {
103	n/a	sign = -1;
104	n/a	m = -m;
105	n/a	}
106	n/a
107	n/a	/* process 28 bits at a time; this should work well both for binary
108	n/a	and hexadecimal floating point. */
109	n/a	x = 0;
110	n/a	while (m) {
111	n/a	x = ((x << 28) & _PyHASH_MODULUS) \| x >> (_PyHASH_BITS - 28);
112	n/a	m = 268435456.0; / 2*28 /
113	n/a	e -= 28;
114	n/a	y = (Py_uhash_t)m; /* pull out integer part */
115	n/a	m -= y;
116	n/a	x += y;
117	n/a	if (x >= _PyHASH_MODULUS)
118	n/a	x -= _PyHASH_MODULUS;
119	n/a	}
120	n/a
121	n/a	/* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
122	n/a	e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
123	n/a	x = ((x << e) & _PyHASH_MODULUS) \| x >> (_PyHASH_BITS - e);
124	n/a
125	n/a	x = x * sign;
126	n/a	if (x == (Py_uhash_t)-1)
127	n/a	x = (Py_uhash_t)-2;
128	n/a	return (Py_hash_t)x;
129	n/a	}
130	n/a
131	n/a	Py_hash_t
132	n/a	_Py_HashPointer(void *p)
133	n/a	{
134	n/a	Py_hash_t x;
135	n/a	size_t y = (size_t)p;
136	n/a	/* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
137	n/a	excessive hash collisions for dicts and sets */
138	n/a	y = (y >> 4) \| (y << (8 * SIZEOF_VOID_P - 4));
139	n/a	x = (Py_hash_t)y;
140	n/a	if (x == -1)
141	n/a	x = -2;
142	n/a	return x;
143	n/a	}
144	n/a
145	n/a	Py_hash_t
146	n/a	_Py_HashBytes(const void *src, Py_ssize_t len)
147	n/a	{
148	n/a	Py_hash_t x;
149	n/a	/*
150	n/a	We make the hash of the empty string be 0, rather than using
151	n/a	(prefix ^ suffix), since this slightly obfuscates the hash secret
152	n/a	*/
153	n/a	if (len == 0) {
154	n/a	return 0;
155	n/a	}
156	n/a
157	n/a	#ifdef Py_HASH_STATS
158	n/a	hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
159	n/a	#endif
160	n/a
161	n/a	#if Py_HASH_CUTOFF > 0
162	n/a	if (len < Py_HASH_CUTOFF) {
163	n/a	/* Optimize hashing of very small strings with inline DJBX33A. */
164	n/a	Py_uhash_t hash;
165	n/a	const unsigned char *p = src;
166	n/a	hash = 5381; /* DJBX33A starts with 5381 */
167	n/a
168	n/a	switch(len) {
169	n/a	/* ((hash << 5) + hash) + p == hash 33 + p /
170	n/a	case 7: hash = ((hash << 5) + hash) + p++; / fallthrough */
171	n/a	case 6: hash = ((hash << 5) + hash) + p++; / fallthrough */
172	n/a	case 5: hash = ((hash << 5) + hash) + p++; / fallthrough */
173	n/a	case 4: hash = ((hash << 5) + hash) + p++; / fallthrough */
174	n/a	case 3: hash = ((hash << 5) + hash) + p++; / fallthrough */
175	n/a	case 2: hash = ((hash << 5) + hash) + p++; / fallthrough */
176	n/a	case 1: hash = ((hash << 5) + hash) + *p++; break;
177	n/a	default:
178	n/a	assert(0);
179	n/a	}
180	n/a	hash ^= len;
181	n/a	hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
182	n/a	x = (Py_hash_t)hash;
183	n/a	}
184	n/a	else
185	n/a	#endif /* Py_HASH_CUTOFF */
186	n/a	x = PyHash_Func.hash(src, len);
187	n/a
188	n/a	if (x == -1)
189	n/a	return -2;
190	n/a	return x;
191	n/a	}
192	n/a
193	n/a	void
194	n/a	_PyHash_Fini(void)
195	n/a	{
196	n/a	#ifdef Py_HASH_STATS
197	n/a	int i;
198	n/a	Py_ssize_t total = 0;
199	n/a	char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n";
200	n/a
201	n/a	fprintf(stderr, "len calls total\n");
202	n/a	for (i = 1; i <= Py_HASH_STATS_MAX; i++) {
203	n/a	total += hashstats[i];
204	n/a	fprintf(stderr, fmt, i, hashstats[i], total);
205	n/a	}
206	n/a	total += hashstats[0];
207	n/a	fprintf(stderr, "> %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n",
208	n/a	hashstats[0], total);
209	n/a	#endif
210	n/a	}
211	n/a
212	n/a	PyHash_FuncDef *
213	n/a	PyHash_GetFuncDef(void)
214	n/a	{
215	n/a	return &PyHash_Func;
216	n/a	}
217	n/a
218	n/a	/* Optimized memcpy() for Windows */
219	n/a	#ifdef _MSC_VER
220	n/a	# if SIZEOF_PY_UHASH_T == 4
221	n/a	# define PY_UHASH_CPY(dst, src) do { \
222	n/a	dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
223	n/a	} while(0)
224	n/a	# elif SIZEOF_PY_UHASH_T == 8
225	n/a	# define PY_UHASH_CPY(dst, src) do { \
226	n/a	dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
227	n/a	dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
228	n/a	} while(0)
229	n/a	# else
230	n/a	# error SIZEOF_PY_UHASH_T must be 4 or 8
231	n/a	# endif /* SIZEOF_PY_UHASH_T */
232	n/a	#else /* not Windows */
233	n/a	# define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
234	n/a	#endif /* _MSC_VER */
235	n/a
236	n/a
237	n/a	#if Py_HASH_ALGORITHM == Py_HASH_FNV
238	n/a	/* **************************************************************************
239	n/a	* Modified Fowler-Noll-Vo (FNV) hash function
240	n/a	*/
241	n/a	static Py_hash_t
242	n/a	fnv(const void *src, Py_ssize_t len)
243	n/a	{
244	n/a	const unsigned char *p = src;
245	n/a	Py_uhash_t x;
246	n/a	Py_ssize_t remainder, blocks;
247	n/a	union {
248	n/a	Py_uhash_t value;
249	n/a	unsigned char bytes[SIZEOF_PY_UHASH_T];
250	n/a	} block;
251	n/a
252	n/a	#ifdef Py_DEBUG
253	n/a	assert(_Py_HashSecret_Initialized);
254	n/a	#endif
255	n/a	remainder = len % SIZEOF_PY_UHASH_T;
256	n/a	if (remainder == 0) {
257	n/a	/* Process at least one block byte by byte to reduce hash collisions
258	n/a	* for strings with common prefixes. */
259	n/a	remainder = SIZEOF_PY_UHASH_T;
260	n/a	}
261	n/a	blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
262	n/a
263	n/a	x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
264	n/a	x ^= (Py_uhash_t) *p << 7;
265	n/a	while (blocks--) {
266	n/a	PY_UHASH_CPY(block.bytes, p);
267	n/a	x = (_PyHASH_MULTIPLIER * x) ^ block.value;
268	n/a	p += SIZEOF_PY_UHASH_T;
269	n/a	}
270	n/a	/* add remainder */
271	n/a	for (; remainder > 0; remainder--)
272	n/a	x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
273	n/a	x ^= (Py_uhash_t) len;
274	n/a	x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
275	n/a	if (x == -1) {
276	n/a	x = -2;
277	n/a	}
278	n/a	return x;
279	n/a	}
280	n/a
281	n/a	static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
282	n/a	16 * SIZEOF_PY_HASH_T};
283	n/a
284	n/a	#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
285	n/a
286	n/a
287	n/a	#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
288	n/a	/* **************************************************************************
289	n/a	<MIT License>
290	n/a	Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
291	n/a
292	n/a	Permission is hereby granted, free of charge, to any person obtaining a copy
293	n/a	of this software and associated documentation files (the "Software"), to deal
294	n/a	in the Software without restriction, including without limitation the rights
295	n/a	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
296	n/a	copies of the Software, and to permit persons to whom the Software is
297	n/a	furnished to do so, subject to the following conditions:
298	n/a
299	n/a	The above copyright notice and this permission notice shall be included in
300	n/a	all copies or substantial portions of the Software.
301	n/a
302	n/a	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
303	n/a	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
304	n/a	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
305	n/a	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
306	n/a	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
307	n/a	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
308	n/a	THE SOFTWARE.
309	n/a	</MIT License>
310	n/a
311	n/a	Original location:
312	n/a	https://github.com/majek/csiphash/
313	n/a
314	n/a	Solution inspired by code from:
315	n/a	Samuel Neves (supercop/crypto_auth/siphash24/little)
316	n/a	djb (supercop/crypto_auth/siphash24/little2)
317	n/a	Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
318	n/a
319	n/a	Modified for Python by Christian Heimes:
320	n/a	- C89 / MSVC compatibility
321	n/a	- _rotl64() on Windows
322	n/a	- letoh64() fallback
323	n/a	*/
324	n/a
325	n/a	/* byte swap little endian to host endian
326	n/a	* Endian conversion not only ensures that the hash function returns the same
327	n/a	* value on all platforms. It is also required to for a good dispersion of
328	n/a	* the hash values' least significant bits.
329	n/a	*/
330	n/a	#if PY_LITTLE_ENDIAN
331	n/a	# define _le64toh(x) ((uint64_t)(x))
332	n/a	#elif defined(__APPLE__)
333	n/a	# define _le64toh(x) OSSwapLittleToHostInt64(x)
334	n/a	#elif defined(HAVE_LETOH64)
335	n/a	# define _le64toh(x) le64toh(x)
336	n/a	#else
337	n/a	# define _le64toh(x) (((uint64_t)(x) << 56) \| \
338	n/a	(((uint64_t)(x) << 40) & 0xff000000000000ULL) \| \
339	n/a	(((uint64_t)(x) << 24) & 0xff0000000000ULL) \| \
340	n/a	(((uint64_t)(x) << 8) & 0xff00000000ULL) \| \
341	n/a	(((uint64_t)(x) >> 8) & 0xff000000ULL) \| \
342	n/a	(((uint64_t)(x) >> 24) & 0xff0000ULL) \| \
343	n/a	(((uint64_t)(x) >> 40) & 0xff00ULL) \| \
344	n/a	((uint64_t)(x) >> 56))
345	n/a	#endif
346	n/a
347	n/a
348	n/a	#ifdef _MSC_VER
349	n/a	# define ROTATE(x, b) _rotl64(x, b)
350	n/a	#else
351	n/a	# define ROTATE(x, b) (uint64_t)( ((x) << (b)) \| ( (x) >> (64 - (b))) )
352	n/a	#endif
353	n/a
354	n/a	#define HALF_ROUND(a,b,c,d,s,t) \
355	n/a	a += b; c += d; \
356	n/a	b = ROTATE(b, s) ^ a; \
357	n/a	d = ROTATE(d, t) ^ c; \
358	n/a	a = ROTATE(a, 32);
359	n/a
360	n/a	#define DOUBLE_ROUND(v0,v1,v2,v3) \
361	n/a	HALF_ROUND(v0,v1,v2,v3,13,16); \
362	n/a	HALF_ROUND(v2,v1,v0,v3,17,21); \
363	n/a	HALF_ROUND(v0,v1,v2,v3,13,16); \
364	n/a	HALF_ROUND(v2,v1,v0,v3,17,21);
365	n/a
366	n/a
367	n/a	static Py_hash_t
368	n/a	siphash24(const void *src, Py_ssize_t src_sz) {
369	n/a	uint64_t k0 = _le64toh(_Py_HashSecret.siphash.k0);
370	n/a	uint64_t k1 = _le64toh(_Py_HashSecret.siphash.k1);
371	n/a	uint64_t b = (uint64_t)src_sz << 56;
372	n/a	const uint64_t in = (uint64_t)src;
373	n/a
374	n/a	uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
375	n/a	uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
376	n/a	uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
377	n/a	uint64_t v3 = k1 ^ 0x7465646279746573ULL;
378	n/a
379	n/a	uint64_t t;
380	n/a	uint8_t *pt;
381	n/a	uint8_t *m;
382	n/a
383	n/a	while (src_sz >= 8) {
384	n/a	uint64_t mi = _le64toh(*in);
385	n/a	in += 1;
386	n/a	src_sz -= 8;
387	n/a	v3 ^= mi;
388	n/a	DOUBLE_ROUND(v0,v1,v2,v3);
389	n/a	v0 ^= mi;
390	n/a	}
391	n/a
392	n/a	t = 0;
393	n/a	pt = (uint8_t *)&t;
394	n/a	m = (uint8_t *)in;
395	n/a	switch (src_sz) {
396	n/a	case 7: pt[6] = m[6];
397	n/a	case 6: pt[5] = m[5];
398	n/a	case 5: pt[4] = m[4];
399	n/a	case 4: memcpy(pt, m, sizeof(uint32_t)); break;
400	n/a	case 3: pt[2] = m[2];
401	n/a	case 2: pt[1] = m[1];
402	n/a	case 1: pt[0] = m[0];
403	n/a	}
404	n/a	b \|= _le64toh(t);
405	n/a
406	n/a	v3 ^= b;
407	n/a	DOUBLE_ROUND(v0,v1,v2,v3);
408	n/a	v0 ^= b;
409	n/a	v2 ^= 0xff;
410	n/a	DOUBLE_ROUND(v0,v1,v2,v3);
411	n/a	DOUBLE_ROUND(v0,v1,v2,v3);
412	n/a
413	n/a	/* modified */
414	n/a	t = (v0 ^ v1) ^ (v2 ^ v3);
415	n/a	return (Py_hash_t)t;
416	n/a	}
417	n/a
418	n/a	static PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
419	n/a
420	n/a	#endif /* Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 */
421	n/a
422	n/a	#ifdef __cplusplus
423	n/a	}
424	n/a	#endif