ยปCore Development>Code coverage>Python/pyhash.c

Python code coverage for Python/pyhash.c

#countcontent
1n/a/* Set of hash utility functions to help maintaining the invariant that
2n/a if a==b then hash(a)==hash(b)
3n/a
4n/a All the utility functions (_Py_Hash*()) return "-1" to signify an error.
5n/a*/
6n/a#include "Python.h"
7n/a
8n/a#ifdef __APPLE__
9n/a# include <libkern/OSByteOrder.h>
10n/a#elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
11n/a# include <endian.h>
12n/a#elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
13n/a# include <sys/endian.h>
14n/a#endif
15n/a
16n/a#ifdef __cplusplus
17n/aextern "C" {
18n/a#endif
19n/a
20n/a_Py_HashSecret_t _Py_HashSecret;
21n/a
22n/a#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
23n/aextern PyHash_FuncDef PyHash_Func;
24n/a#else
25n/astatic PyHash_FuncDef PyHash_Func;
26n/a#endif
27n/a
28n/a/* Count _Py_HashBytes() calls */
29n/a#ifdef Py_HASH_STATS
30n/a#define Py_HASH_STATS_MAX 32
31n/astatic Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
32n/a#endif
33n/a
34n/a/* For numeric types, the hash of a number x is based on the reduction
35n/a of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
36n/a hash(x) == hash(y) whenever x and y are numerically equal, even if
37n/a x and y have different types.
38n/a
39n/a A quick summary of the hashing strategy:
40n/a
41n/a (1) First define the 'reduction of x modulo P' for any rational
42n/a number x; this is a standard extension of the usual notion of
43n/a reduction modulo P for integers. If x == p/q (written in lowest
44n/a terms), the reduction is interpreted as the reduction of p times
45n/a the inverse of the reduction of q, all modulo P; if q is exactly
46n/a divisible by P then define the reduction to be infinity. So we've
47n/a got a well-defined map
48n/a
49n/a reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
50n/a
51n/a (2) Now for a rational number x, define hash(x) by:
52n/a
53n/a reduce(x) if x >= 0
54n/a -reduce(-x) if x < 0
55n/a
56n/a If the result of the reduction is infinity (this is impossible for
57n/a integers, floats and Decimals) then use the predefined hash value
58n/a _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
59n/a _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
60n/a hashes of float and Decimal infinities and nans.
61n/a
62n/a A selling point for the above strategy is that it makes it possible
63n/a to compute hashes of decimal and binary floating-point numbers
64n/a efficiently, even if the exponent of the binary or decimal number
65n/a is large. The key point is that
66n/a
67n/a reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
68n/a
69n/a provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
70n/a binary or decimal float is never infinity, since the denominator is a power
71n/a of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
72n/a for nonnegative x,
73n/a
74n/a reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
75n/a
76n/a reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
77n/a
78n/a and reduce(10**e) can be computed efficiently by the usual modular
79n/a exponentiation algorithm. For reduce(2**e) it's even better: since
80n/a P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
81n/a by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
82n/a
83n/a */
84n/a
85n/aPy_hash_t
86n/a_Py_HashDouble(double v)
87n/a{
88n/a int e, sign;
89n/a double m;
90n/a Py_uhash_t x, y;
91n/a
92n/a if (!Py_IS_FINITE(v)) {
93n/a if (Py_IS_INFINITY(v))
94n/a return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
95n/a else
96n/a return _PyHASH_NAN;
97n/a }
98n/a
99n/a m = frexp(v, &e);
100n/a
101n/a sign = 1;
102n/a if (m < 0) {
103n/a sign = -1;
104n/a m = -m;
105n/a }
106n/a
107n/a /* process 28 bits at a time; this should work well both for binary
108n/a and hexadecimal floating point. */
109n/a x = 0;
110n/a while (m) {
111n/a x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
112n/a m *= 268435456.0; /* 2**28 */
113n/a e -= 28;
114n/a y = (Py_uhash_t)m; /* pull out integer part */
115n/a m -= y;
116n/a x += y;
117n/a if (x >= _PyHASH_MODULUS)
118n/a x -= _PyHASH_MODULUS;
119n/a }
120n/a
121n/a /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
122n/a e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
123n/a x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
124n/a
125n/a x = x * sign;
126n/a if (x == (Py_uhash_t)-1)
127n/a x = (Py_uhash_t)-2;
128n/a return (Py_hash_t)x;
129n/a}
130n/a
131n/aPy_hash_t
132n/a_Py_HashPointer(void *p)
133n/a{
134n/a Py_hash_t x;
135n/a size_t y = (size_t)p;
136n/a /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
137n/a excessive hash collisions for dicts and sets */
138n/a y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
139n/a x = (Py_hash_t)y;
140n/a if (x == -1)
141n/a x = -2;
142n/a return x;
143n/a}
144n/a
145n/aPy_hash_t
146n/a_Py_HashBytes(const void *src, Py_ssize_t len)
147n/a{
148n/a Py_hash_t x;
149n/a /*
150n/a We make the hash of the empty string be 0, rather than using
151n/a (prefix ^ suffix), since this slightly obfuscates the hash secret
152n/a */
153n/a if (len == 0) {
154n/a return 0;
155n/a }
156n/a
157n/a#ifdef Py_HASH_STATS
158n/a hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
159n/a#endif
160n/a
161n/a#if Py_HASH_CUTOFF > 0
162n/a if (len < Py_HASH_CUTOFF) {
163n/a /* Optimize hashing of very small strings with inline DJBX33A. */
164n/a Py_uhash_t hash;
165n/a const unsigned char *p = src;
166n/a hash = 5381; /* DJBX33A starts with 5381 */
167n/a
168n/a switch(len) {
169n/a /* ((hash << 5) + hash) + *p == hash * 33 + *p */
170n/a case 7: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
171n/a case 6: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
172n/a case 5: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
173n/a case 4: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
174n/a case 3: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
175n/a case 2: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
176n/a case 1: hash = ((hash << 5) + hash) + *p++; break;
177n/a default:
178n/a assert(0);
179n/a }
180n/a hash ^= len;
181n/a hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
182n/a x = (Py_hash_t)hash;
183n/a }
184n/a else
185n/a#endif /* Py_HASH_CUTOFF */
186n/a x = PyHash_Func.hash(src, len);
187n/a
188n/a if (x == -1)
189n/a return -2;
190n/a return x;
191n/a}
192n/a
193n/avoid
194n/a_PyHash_Fini(void)
195n/a{
196n/a#ifdef Py_HASH_STATS
197n/a int i;
198n/a Py_ssize_t total = 0;
199n/a char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n";
200n/a
201n/a fprintf(stderr, "len calls total\n");
202n/a for (i = 1; i <= Py_HASH_STATS_MAX; i++) {
203n/a total += hashstats[i];
204n/a fprintf(stderr, fmt, i, hashstats[i], total);
205n/a }
206n/a total += hashstats[0];
207n/a fprintf(stderr, "> %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n",
208n/a hashstats[0], total);
209n/a#endif
210n/a}
211n/a
212n/aPyHash_FuncDef *
213n/aPyHash_GetFuncDef(void)
214n/a{
215n/a return &PyHash_Func;
216n/a}
217n/a
218n/a/* Optimized memcpy() for Windows */
219n/a#ifdef _MSC_VER
220n/a# if SIZEOF_PY_UHASH_T == 4
221n/a# define PY_UHASH_CPY(dst, src) do { \
222n/a dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
223n/a } while(0)
224n/a# elif SIZEOF_PY_UHASH_T == 8
225n/a# define PY_UHASH_CPY(dst, src) do { \
226n/a dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
227n/a dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
228n/a } while(0)
229n/a# else
230n/a# error SIZEOF_PY_UHASH_T must be 4 or 8
231n/a# endif /* SIZEOF_PY_UHASH_T */
232n/a#else /* not Windows */
233n/a# define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
234n/a#endif /* _MSC_VER */
235n/a
236n/a
237n/a#if Py_HASH_ALGORITHM == Py_HASH_FNV
238n/a/* **************************************************************************
239n/a * Modified Fowler-Noll-Vo (FNV) hash function
240n/a */
241n/astatic Py_hash_t
242n/afnv(const void *src, Py_ssize_t len)
243n/a{
244n/a const unsigned char *p = src;
245n/a Py_uhash_t x;
246n/a Py_ssize_t remainder, blocks;
247n/a union {
248n/a Py_uhash_t value;
249n/a unsigned char bytes[SIZEOF_PY_UHASH_T];
250n/a } block;
251n/a
252n/a#ifdef Py_DEBUG
253n/a assert(_Py_HashSecret_Initialized);
254n/a#endif
255n/a remainder = len % SIZEOF_PY_UHASH_T;
256n/a if (remainder == 0) {
257n/a /* Process at least one block byte by byte to reduce hash collisions
258n/a * for strings with common prefixes. */
259n/a remainder = SIZEOF_PY_UHASH_T;
260n/a }
261n/a blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
262n/a
263n/a x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
264n/a x ^= (Py_uhash_t) *p << 7;
265n/a while (blocks--) {
266n/a PY_UHASH_CPY(block.bytes, p);
267n/a x = (_PyHASH_MULTIPLIER * x) ^ block.value;
268n/a p += SIZEOF_PY_UHASH_T;
269n/a }
270n/a /* add remainder */
271n/a for (; remainder > 0; remainder--)
272n/a x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
273n/a x ^= (Py_uhash_t) len;
274n/a x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
275n/a if (x == -1) {
276n/a x = -2;
277n/a }
278n/a return x;
279n/a}
280n/a
281n/astatic PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
282n/a 16 * SIZEOF_PY_HASH_T};
283n/a
284n/a#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
285n/a
286n/a
287n/a#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
288n/a/* **************************************************************************
289n/a <MIT License>
290n/a Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
291n/a
292n/a Permission is hereby granted, free of charge, to any person obtaining a copy
293n/a of this software and associated documentation files (the "Software"), to deal
294n/a in the Software without restriction, including without limitation the rights
295n/a to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
296n/a copies of the Software, and to permit persons to whom the Software is
297n/a furnished to do so, subject to the following conditions:
298n/a
299n/a The above copyright notice and this permission notice shall be included in
300n/a all copies or substantial portions of the Software.
301n/a
302n/a THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
303n/a IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
304n/a FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
305n/a AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
306n/a LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
307n/a OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
308n/a THE SOFTWARE.
309n/a </MIT License>
310n/a
311n/a Original location:
312n/a https://github.com/majek/csiphash/
313n/a
314n/a Solution inspired by code from:
315n/a Samuel Neves (supercop/crypto_auth/siphash24/little)
316n/a djb (supercop/crypto_auth/siphash24/little2)
317n/a Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
318n/a
319n/a Modified for Python by Christian Heimes:
320n/a - C89 / MSVC compatibility
321n/a - _rotl64() on Windows
322n/a - letoh64() fallback
323n/a*/
324n/a
325n/a/* byte swap little endian to host endian
326n/a * Endian conversion not only ensures that the hash function returns the same
327n/a * value on all platforms. It is also required to for a good dispersion of
328n/a * the hash values' least significant bits.
329n/a */
330n/a#if PY_LITTLE_ENDIAN
331n/a# define _le64toh(x) ((uint64_t)(x))
332n/a#elif defined(__APPLE__)
333n/a# define _le64toh(x) OSSwapLittleToHostInt64(x)
334n/a#elif defined(HAVE_LETOH64)
335n/a# define _le64toh(x) le64toh(x)
336n/a#else
337n/a# define _le64toh(x) (((uint64_t)(x) << 56) | \
338n/a (((uint64_t)(x) << 40) & 0xff000000000000ULL) | \
339n/a (((uint64_t)(x) << 24) & 0xff0000000000ULL) | \
340n/a (((uint64_t)(x) << 8) & 0xff00000000ULL) | \
341n/a (((uint64_t)(x) >> 8) & 0xff000000ULL) | \
342n/a (((uint64_t)(x) >> 24) & 0xff0000ULL) | \
343n/a (((uint64_t)(x) >> 40) & 0xff00ULL) | \
344n/a ((uint64_t)(x) >> 56))
345n/a#endif
346n/a
347n/a
348n/a#ifdef _MSC_VER
349n/a# define ROTATE(x, b) _rotl64(x, b)
350n/a#else
351n/a# define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
352n/a#endif
353n/a
354n/a#define HALF_ROUND(a,b,c,d,s,t) \
355n/a a += b; c += d; \
356n/a b = ROTATE(b, s) ^ a; \
357n/a d = ROTATE(d, t) ^ c; \
358n/a a = ROTATE(a, 32);
359n/a
360n/a#define DOUBLE_ROUND(v0,v1,v2,v3) \
361n/a HALF_ROUND(v0,v1,v2,v3,13,16); \
362n/a HALF_ROUND(v2,v1,v0,v3,17,21); \
363n/a HALF_ROUND(v0,v1,v2,v3,13,16); \
364n/a HALF_ROUND(v2,v1,v0,v3,17,21);
365n/a
366n/a
367n/astatic Py_hash_t
368n/asiphash24(const void *src, Py_ssize_t src_sz) {
369n/a uint64_t k0 = _le64toh(_Py_HashSecret.siphash.k0);
370n/a uint64_t k1 = _le64toh(_Py_HashSecret.siphash.k1);
371n/a uint64_t b = (uint64_t)src_sz << 56;
372n/a const uint64_t *in = (uint64_t*)src;
373n/a
374n/a uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
375n/a uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
376n/a uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
377n/a uint64_t v3 = k1 ^ 0x7465646279746573ULL;
378n/a
379n/a uint64_t t;
380n/a uint8_t *pt;
381n/a uint8_t *m;
382n/a
383n/a while (src_sz >= 8) {
384n/a uint64_t mi = _le64toh(*in);
385n/a in += 1;
386n/a src_sz -= 8;
387n/a v3 ^= mi;
388n/a DOUBLE_ROUND(v0,v1,v2,v3);
389n/a v0 ^= mi;
390n/a }
391n/a
392n/a t = 0;
393n/a pt = (uint8_t *)&t;
394n/a m = (uint8_t *)in;
395n/a switch (src_sz) {
396n/a case 7: pt[6] = m[6];
397n/a case 6: pt[5] = m[5];
398n/a case 5: pt[4] = m[4];
399n/a case 4: memcpy(pt, m, sizeof(uint32_t)); break;
400n/a case 3: pt[2] = m[2];
401n/a case 2: pt[1] = m[1];
402n/a case 1: pt[0] = m[0];
403n/a }
404n/a b |= _le64toh(t);
405n/a
406n/a v3 ^= b;
407n/a DOUBLE_ROUND(v0,v1,v2,v3);
408n/a v0 ^= b;
409n/a v2 ^= 0xff;
410n/a DOUBLE_ROUND(v0,v1,v2,v3);
411n/a DOUBLE_ROUND(v0,v1,v2,v3);
412n/a
413n/a /* modified */
414n/a t = (v0 ^ v1) ^ (v2 ^ v3);
415n/a return (Py_hash_t)t;
416n/a}
417n/a
418n/astatic PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
419n/a
420n/a#endif /* Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 */
421n/a
422n/a#ifdef __cplusplus
423n/a}
424n/a#endif