ยปCore Development>Code coverage>Modules/hashtable.c

Python code coverage for Modules/hashtable.c

#countcontent
1n/a/* The implementation of the hash table (_Py_hashtable_t) is based on the
2n/a cfuhash project:
3n/a http://sourceforge.net/projects/libcfu/
4n/a
5n/a Copyright of cfuhash:
6n/a ----------------------------------
7n/a Creation date: 2005-06-24 21:22:40
8n/a Authors: Don
9n/a Change log:
10n/a
11n/a Copyright (c) 2005 Don Owens
12n/a All rights reserved.
13n/a
14n/a This code is released under the BSD license:
15n/a
16n/a Redistribution and use in source and binary forms, with or without
17n/a modification, are permitted provided that the following conditions
18n/a are met:
19n/a
20n/a * Redistributions of source code must retain the above copyright
21n/a notice, this list of conditions and the following disclaimer.
22n/a
23n/a * Redistributions in binary form must reproduce the above
24n/a copyright notice, this list of conditions and the following
25n/a disclaimer in the documentation and/or other materials provided
26n/a with the distribution.
27n/a
28n/a * Neither the name of the author nor the names of its
29n/a contributors may be used to endorse or promote products derived
30n/a from this software without specific prior written permission.
31n/a
32n/a THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33n/a "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34n/a LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35n/a FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36n/a COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37n/a INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38n/a (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39n/a SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40n/a HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41n/a STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42n/a ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43n/a OF THE POSSIBILITY OF SUCH DAMAGE.
44n/a ----------------------------------
45n/a*/
46n/a
47n/a#include "Python.h"
48n/a#include "hashtable.h"
49n/a
50n/a#define HASHTABLE_MIN_SIZE 16
51n/a#define HASHTABLE_HIGH 0.50
52n/a#define HASHTABLE_LOW 0.10
53n/a#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54n/a
55n/a#define BUCKETS_HEAD(SLIST) \
56n/a ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57n/a#define TABLE_HEAD(HT, BUCKET) \
58n/a ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59n/a#define ENTRY_NEXT(ENTRY) \
60n/a ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61n/a#define HASHTABLE_ITEM_SIZE(HT) \
62n/a (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
63n/a
64n/a#define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
65n/a do { \
66n/a assert((DATA_SIZE) == (TABLE)->data_size); \
67n/a memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \
68n/a (DATA_SIZE)); \
69n/a } while (0)
70n/a
71n/a#define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
72n/a do { \
73n/a assert((DATA_SIZE) == (TABLE)->data_size); \
74n/a memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \
75n/a (PDATA), (DATA_SIZE)); \
76n/a } while (0)
77n/a
78n/a/* Forward declaration */
79n/astatic void hashtable_rehash(_Py_hashtable_t *ht);
80n/a
81n/astatic void
82n/a_Py_slist_init(_Py_slist_t *list)
83n/a{
84n/a list->head = NULL;
85n/a}
86n/a
87n/a
88n/astatic void
89n/a_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
90n/a{
91n/a item->next = list->head;
92n/a list->head = item;
93n/a}
94n/a
95n/a
96n/astatic void
97n/a_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
98n/a _Py_slist_item_t *item)
99n/a{
100n/a if (previous != NULL)
101n/a previous->next = item->next;
102n/a else
103n/a list->head = item->next;
104n/a}
105n/a
106n/a
107n/aPy_uhash_t
108n/a_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
109n/a{
110n/a void *key;
111n/a
112n/a _Py_HASHTABLE_READ_KEY(ht, pkey, key);
113n/a return (Py_uhash_t)_Py_HashPointer(key);
114n/a}
115n/a
116n/a
117n/aint
118n/a_Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey,
119n/a const _Py_hashtable_entry_t *entry)
120n/a{
121n/a const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
122n/a return (memcmp(pkey, pkey2, ht->key_size) == 0);
123n/a}
124n/a
125n/a
126n/a/* makes sure the real size of the buckets array is a power of 2 */
127n/astatic size_t
128n/around_size(size_t s)
129n/a{
130n/a size_t i;
131n/a if (s < HASHTABLE_MIN_SIZE)
132n/a return HASHTABLE_MIN_SIZE;
133n/a i = 1;
134n/a while (i < s)
135n/a i <<= 1;
136n/a return i;
137n/a}
138n/a
139n/a
140n/a_Py_hashtable_t *
141n/a_Py_hashtable_new_full(size_t key_size, size_t data_size,
142n/a size_t init_size,
143n/a _Py_hashtable_hash_func hash_func,
144n/a _Py_hashtable_compare_func compare_func,
145n/a _Py_hashtable_allocator_t *allocator)
146n/a{
147n/a _Py_hashtable_t *ht;
148n/a size_t buckets_size;
149n/a _Py_hashtable_allocator_t alloc;
150n/a
151n/a if (allocator == NULL) {
152n/a alloc.malloc = PyMem_RawMalloc;
153n/a alloc.free = PyMem_RawFree;
154n/a }
155n/a else
156n/a alloc = *allocator;
157n/a
158n/a ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
159n/a if (ht == NULL)
160n/a return ht;
161n/a
162n/a ht->num_buckets = round_size(init_size);
163n/a ht->entries = 0;
164n/a ht->key_size = key_size;
165n/a ht->data_size = data_size;
166n/a
167n/a buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
168n/a ht->buckets = alloc.malloc(buckets_size);
169n/a if (ht->buckets == NULL) {
170n/a alloc.free(ht);
171n/a return NULL;
172n/a }
173n/a memset(ht->buckets, 0, buckets_size);
174n/a
175n/a ht->hash_func = hash_func;
176n/a ht->compare_func = compare_func;
177n/a ht->alloc = alloc;
178n/a return ht;
179n/a}
180n/a
181n/a
182n/a_Py_hashtable_t *
183n/a_Py_hashtable_new(size_t key_size, size_t data_size,
184n/a _Py_hashtable_hash_func hash_func,
185n/a _Py_hashtable_compare_func compare_func)
186n/a{
187n/a return _Py_hashtable_new_full(key_size, data_size,
188n/a HASHTABLE_MIN_SIZE,
189n/a hash_func, compare_func,
190n/a NULL);
191n/a}
192n/a
193n/a
194n/asize_t
195n/a_Py_hashtable_size(_Py_hashtable_t *ht)
196n/a{
197n/a size_t size;
198n/a
199n/a size = sizeof(_Py_hashtable_t);
200n/a
201n/a /* buckets */
202n/a size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
203n/a
204n/a /* entries */
205n/a size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
206n/a
207n/a return size;
208n/a}
209n/a
210n/a
211n/a#ifdef Py_DEBUG
212n/avoid
213n/a_Py_hashtable_print_stats(_Py_hashtable_t *ht)
214n/a{
215n/a size_t size;
216n/a size_t chain_len, max_chain_len, total_chain_len, nchains;
217n/a _Py_hashtable_entry_t *entry;
218n/a size_t hv;
219n/a double load;
220n/a
221n/a size = _Py_hashtable_size(ht);
222n/a
223n/a load = (double)ht->entries / ht->num_buckets;
224n/a
225n/a max_chain_len = 0;
226n/a total_chain_len = 0;
227n/a nchains = 0;
228n/a for (hv = 0; hv < ht->num_buckets; hv++) {
229n/a entry = TABLE_HEAD(ht, hv);
230n/a if (entry != NULL) {
231n/a chain_len = 0;
232n/a for (; entry; entry = ENTRY_NEXT(entry)) {
233n/a chain_len++;
234n/a }
235n/a if (chain_len > max_chain_len)
236n/a max_chain_len = chain_len;
237n/a total_chain_len += chain_len;
238n/a nchains++;
239n/a }
240n/a }
241n/a printf("hash table %p: entries=%"
242n/a PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
243n/a ht, ht->entries, ht->num_buckets, load * 100.0);
244n/a if (nchains)
245n/a printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
246n/a printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n",
247n/a max_chain_len, size / 1024);
248n/a}
249n/a#endif
250n/a
251n/a
252n/a_Py_hashtable_entry_t *
253n/a_Py_hashtable_get_entry(_Py_hashtable_t *ht,
254n/a size_t key_size, const void *pkey)
255n/a{
256n/a Py_uhash_t key_hash;
257n/a size_t index;
258n/a _Py_hashtable_entry_t *entry;
259n/a
260n/a assert(key_size == ht->key_size);
261n/a
262n/a key_hash = ht->hash_func(ht, pkey);
263n/a index = key_hash & (ht->num_buckets - 1);
264n/a
265n/a for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
266n/a if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
267n/a break;
268n/a }
269n/a
270n/a return entry;
271n/a}
272n/a
273n/a
274n/astatic int
275n/a_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
276n/a void *data, size_t data_size)
277n/a{
278n/a Py_uhash_t key_hash;
279n/a size_t index;
280n/a _Py_hashtable_entry_t *entry, *previous;
281n/a
282n/a assert(key_size == ht->key_size);
283n/a
284n/a key_hash = ht->hash_func(ht, pkey);
285n/a index = key_hash & (ht->num_buckets - 1);
286n/a
287n/a previous = NULL;
288n/a for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
289n/a if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
290n/a break;
291n/a previous = entry;
292n/a }
293n/a
294n/a if (entry == NULL)
295n/a return 0;
296n/a
297n/a _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
298n/a (_Py_slist_item_t *)entry);
299n/a ht->entries--;
300n/a
301n/a if (data != NULL)
302n/a ENTRY_READ_PDATA(ht, entry, data_size, data);
303n/a ht->alloc.free(entry);
304n/a
305n/a if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
306n/a hashtable_rehash(ht);
307n/a return 1;
308n/a}
309n/a
310n/a
311n/aint
312n/a_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
313n/a size_t data_size, const void *data)
314n/a{
315n/a Py_uhash_t key_hash;
316n/a size_t index;
317n/a _Py_hashtable_entry_t *entry;
318n/a
319n/a assert(key_size == ht->key_size);
320n/a
321n/a assert(data != NULL || data_size == 0);
322n/a#ifndef NDEBUG
323n/a /* Don't write the assertion on a single line because it is interesting
324n/a to know the duplicated entry if the assertion failed. The entry can
325n/a be read using a debugger. */
326n/a entry = _Py_hashtable_get_entry(ht, key_size, pkey);
327n/a assert(entry == NULL);
328n/a#endif
329n/a
330n/a key_hash = ht->hash_func(ht, pkey);
331n/a index = key_hash & (ht->num_buckets - 1);
332n/a
333n/a entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
334n/a if (entry == NULL) {
335n/a /* memory allocation failed */
336n/a return -1;
337n/a }
338n/a
339n/a entry->key_hash = key_hash;
340n/a memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size);
341n/a if (data)
342n/a ENTRY_WRITE_PDATA(ht, entry, data_size, data);
343n/a
344n/a _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
345n/a ht->entries++;
346n/a
347n/a if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
348n/a hashtable_rehash(ht);
349n/a return 0;
350n/a}
351n/a
352n/a
353n/aint
354n/a_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
355n/a size_t data_size, void *data)
356n/a{
357n/a _Py_hashtable_entry_t *entry;
358n/a
359n/a assert(data != NULL);
360n/a
361n/a entry = _Py_hashtable_get_entry(ht, key_size, pkey);
362n/a if (entry == NULL)
363n/a return 0;
364n/a ENTRY_READ_PDATA(ht, entry, data_size, data);
365n/a return 1;
366n/a}
367n/a
368n/a
369n/aint
370n/a_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
371n/a size_t data_size, void *data)
372n/a{
373n/a assert(data != NULL);
374n/a return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
375n/a}
376n/a
377n/a
378n/a/* Code commented since the function is not needed in Python */
379n/a#if 0
380n/avoid
381n/a_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
382n/a{
383n/a#ifndef NDEBUG
384n/a int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
385n/a assert(found);
386n/a#else
387n/a (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
388n/a#endif
389n/a}
390n/a#endif
391n/a
392n/a
393n/aint
394n/a_Py_hashtable_foreach(_Py_hashtable_t *ht,
395n/a _Py_hashtable_foreach_func func,
396n/a void *arg)
397n/a{
398n/a _Py_hashtable_entry_t *entry;
399n/a size_t hv;
400n/a
401n/a for (hv = 0; hv < ht->num_buckets; hv++) {
402n/a for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
403n/a int res = func(ht, entry, arg);
404n/a if (res)
405n/a return res;
406n/a }
407n/a }
408n/a return 0;
409n/a}
410n/a
411n/a
412n/astatic void
413n/ahashtable_rehash(_Py_hashtable_t *ht)
414n/a{
415n/a size_t buckets_size, new_size, bucket;
416n/a _Py_slist_t *old_buckets = NULL;
417n/a size_t old_num_buckets;
418n/a
419n/a new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
420n/a if (new_size == ht->num_buckets)
421n/a return;
422n/a
423n/a old_num_buckets = ht->num_buckets;
424n/a
425n/a buckets_size = new_size * sizeof(ht->buckets[0]);
426n/a old_buckets = ht->buckets;
427n/a ht->buckets = ht->alloc.malloc(buckets_size);
428n/a if (ht->buckets == NULL) {
429n/a /* cancel rehash on memory allocation failure */
430n/a ht->buckets = old_buckets ;
431n/a /* memory allocation failed */
432n/a return;
433n/a }
434n/a memset(ht->buckets, 0, buckets_size);
435n/a
436n/a ht->num_buckets = new_size;
437n/a
438n/a for (bucket = 0; bucket < old_num_buckets; bucket++) {
439n/a _Py_hashtable_entry_t *entry, *next;
440n/a for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
441n/a size_t entry_index;
442n/a
443n/a
444n/a assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash);
445n/a next = ENTRY_NEXT(entry);
446n/a entry_index = entry->key_hash & (new_size - 1);
447n/a
448n/a _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
449n/a }
450n/a }
451n/a
452n/a ht->alloc.free(old_buckets);
453n/a}
454n/a
455n/a
456n/avoid
457n/a_Py_hashtable_clear(_Py_hashtable_t *ht)
458n/a{
459n/a _Py_hashtable_entry_t *entry, *next;
460n/a size_t i;
461n/a
462n/a for (i=0; i < ht->num_buckets; i++) {
463n/a for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
464n/a next = ENTRY_NEXT(entry);
465n/a ht->alloc.free(entry);
466n/a }
467n/a _Py_slist_init(&ht->buckets[i]);
468n/a }
469n/a ht->entries = 0;
470n/a hashtable_rehash(ht);
471n/a}
472n/a
473n/a
474n/avoid
475n/a_Py_hashtable_destroy(_Py_hashtable_t *ht)
476n/a{
477n/a size_t i;
478n/a
479n/a for (i = 0; i < ht->num_buckets; i++) {
480n/a _Py_slist_item_t *entry = ht->buckets[i].head;
481n/a while (entry) {
482n/a _Py_slist_item_t *entry_next = entry->next;
483n/a ht->alloc.free(entry);
484n/a entry = entry_next;
485n/a }
486n/a }
487n/a
488n/a ht->alloc.free(ht->buckets);
489n/a ht->alloc.free(ht);
490n/a}
491n/a
492n/a
493n/a_Py_hashtable_t *
494n/a_Py_hashtable_copy(_Py_hashtable_t *src)
495n/a{
496n/a const size_t key_size = src->key_size;
497n/a const size_t data_size = src->data_size;
498n/a _Py_hashtable_t *dst;
499n/a _Py_hashtable_entry_t *entry;
500n/a size_t bucket;
501n/a int err;
502n/a
503n/a dst = _Py_hashtable_new_full(key_size, data_size,
504n/a src->num_buckets,
505n/a src->hash_func,
506n/a src->compare_func,
507n/a &src->alloc);
508n/a if (dst == NULL)
509n/a return NULL;
510n/a
511n/a for (bucket=0; bucket < src->num_buckets; bucket++) {
512n/a entry = TABLE_HEAD(src, bucket);
513n/a for (; entry; entry = ENTRY_NEXT(entry)) {
514n/a const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry);
515n/a const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry);
516n/a err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata);
517n/a if (err) {
518n/a _Py_hashtable_destroy(dst);
519n/a return NULL;
520n/a }
521n/a }
522n/a }
523n/a return dst;
524n/a}