deps/jemalloc/src/ckh.c

   1 /*
   2  *******************************************************************************
   3  * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
   4  * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
   5  * functions are employed.  The original cuckoo hashing algorithm was described
   6  * in:
   7  *
   8  *   Pagh, R., F.F. Rodler (2004) Cuckoo Hashing.  Journal of Algorithms
   9  *     51(2):122-144.
  10  *
  11  * Generalization of cuckoo hashing was discussed in:
  12  *
  13  *   Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
  14  *     alternative to traditional hash tables.  In Proceedings of the 7th
  15  *     Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
  16  *     January 2006.
  17  *
  18  * This implementation uses precisely two hash functions because that is the
  19  * fewest that can work, and supporting multiple hashes is an implementation
  20  * burden.  Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
  21  * that shows approximate expected maximum load factors for various
  22  * configurations:
  23  *
  24  *           |         #cells/bucket         |
  25  *   #hashes |   1   |   2   |   4   |   8   |
  26  *   --------+-------+-------+-------+-------+
  27  *         1 | 0.006 | 0.006 | 0.03  | 0.12  |
  28  *         2 | 0.49  | 0.86  |>0.93< |>0.96< |
  29  *         3 | 0.91  | 0.97  | 0.98  | 0.999 |
  30  *         4 | 0.97  | 0.99  | 0.999 |       |
  31  *
  32  * The number of cells per bucket is chosen such that a bucket fits in one cache
  33  * line.  So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
  34  * respectively.
  35  *
  36  ******************************************************************************/
  37 #define JEMALLOC_CKH_C_
  38 #include "jemalloc/internal/jemalloc_internal.h"
  39
  40 /******************************************************************************/
  41 /* Function prototypes for non-inline static functions. */
  42
  43 static bool     ckh_grow(ckh_t *ckh);
  44 static void     ckh_shrink(ckh_t *ckh);
  45
  46 /******************************************************************************/
  47
  48 /*
  49  * Search bucket for key and return the cell number if found; SIZE_T_MAX
  50  * otherwise.
  51  */
  52 JEMALLOC_INLINE size_t
  53 ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
  54 {
  55         ckhc_t *cell;
  56         unsigned i;
  57
  58         for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
  59                 cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
  60                 if (cell->key != NULL && ckh->keycomp(key, cell->key))
  61                         return ((bucket << LG_CKH_BUCKET_CELLS) + i);
  62         }
  63
  64         return (SIZE_T_MAX);
  65 }
  66
  67 /*
  68  * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
  69  */
  70 JEMALLOC_INLINE size_t
  71 ckh_isearch(ckh_t *ckh, const void *key)
  72 {
  73         size_t hash1, hash2, bucket, cell;
  74
  75         assert(ckh != NULL);
  76         dassert(ckh->magic == CKH_MAGIC);
  77
  78         ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
  79
  80         /* Search primary bucket. */
  81         bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
  82         cell = ckh_bucket_search(ckh, bucket, key);
  83         if (cell != SIZE_T_MAX)
  84                 return (cell);
  85
  86         /* Search secondary bucket. */
  87         bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
  88         cell = ckh_bucket_search(ckh, bucket, key);
  89         return (cell);
  90 }
  91
  92 JEMALLOC_INLINE bool
  93 ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
  94     const void *data)
  95 {
  96         ckhc_t *cell;
  97         unsigned offset, i;
  98
  99         /*
 100          * Cycle through the cells in the bucket, starting at a random position.
 101          * The randomness avoids worst-case search overhead as buckets fill up.
 102          */
 103         prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
 104         for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 105                 cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 106                     ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
 107                 if (cell->key == NULL) {
 108                         cell->key = key;
 109                         cell->data = data;
 110                         ckh->count++;
 111                         return (false);
 112                 }
 113         }
 114
 115         return (true);
 116 }
 117
 118 /*
 119  * No space is available in bucket.  Randomly evict an item, then try to find an
 120  * alternate location for that item.  Iteratively repeat this
 121  * eviction/relocation procedure until either success or detection of an
 122  * eviction/relocation bucket cycle.
 123  */
 124 JEMALLOC_INLINE bool
 125 ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 126     void const **argdata)
 127 {
 128         const void *key, *data, *tkey, *tdata;
 129         ckhc_t *cell;
 130         size_t hash1, hash2, bucket, tbucket;
 131         unsigned i;
 132
 133         bucket = argbucket;
 134         key = *argkey;
 135         data = *argdata;
 136         while (true) {
 137                 /*
 138                  * Choose a random item within the bucket to evict.  This is
 139                  * critical to correct function, because without (eventually)
 140                  * evicting all items within a bucket during iteration, it
 141                  * would be possible to get stuck in an infinite loop if there
 142                  * were an item for which both hashes indicated the same
 143                  * bucket.
 144                  */
 145                 prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
 146                 cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 147                 assert(cell->key != NULL);
 148
 149                 /* Swap cell->{key,data} and {key,data} (evict). */
 150                 tkey = cell->key; tdata = cell->data;
 151                 cell->key = key; cell->data = data;
 152                 key = tkey; data = tdata;
 153
 154 #ifdef CKH_COUNT
 155                 ckh->nrelocs++;
 156 #endif
 157
 158                 /* Find the alternate bucket for the evicted item. */
 159                 ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
 160                 tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
 161                 if (tbucket == bucket) {
 162                         tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
 163                         /*
 164                          * It may be that (tbucket == bucket) still, if the
 165                          * item's hashes both indicate this bucket.  However,
 166                          * we are guaranteed to eventually escape this bucket
 167                          * during iteration, assuming pseudo-random item
 168                          * selection (true randomness would make infinite
 169                          * looping a remote possibility).  The reason we can
 170                          * never get trapped forever is that there are two
 171                          * cases:
 172                          *
 173                          * 1) This bucket == argbucket, so we will quickly
 174                          *    detect an eviction cycle and terminate.
 175                          * 2) An item was evicted to this bucket from another,
 176                          *    which means that at least one item in this bucket
 177                          *    has hashes that indicate distinct buckets.
 178                          */
 179                 }
 180                 /* Check for a cycle. */
 181                 if (tbucket == argbucket) {
 182                         *argkey = key;
 183                         *argdata = data;
 184                         return (true);
 185                 }
 186
 187                 bucket = tbucket;
 188                 if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
 189                         return (false);
 190         }
 191 }
 192
 193 JEMALLOC_INLINE bool
 194 ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
 195 {
 196         size_t hash1, hash2, bucket;
 197         const void *key = *argkey;
 198         const void *data = *argdata;
 199
 200         ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
 201
 202         /* Try to insert in primary bucket. */
 203         bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
 204         if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
 205                 return (false);
 206
 207         /* Try to insert in secondary bucket. */
 208         bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
 209         if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
 210                 return (false);
 211
 212         /*
 213          * Try to find a place for this item via iterative eviction/relocation.
 214          */
 215         return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
 216 }
 217
 218 /*
 219  * Try to rebuild the hash table from scratch by inserting all items from the
 220  * old table into the new.
 221  */
 222 JEMALLOC_INLINE bool
 223 ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 224 {
 225         size_t count, i, nins;
 226         const void *key, *data;
 227
 228         count = ckh->count;
 229         ckh->count = 0;
 230         for (i = nins = 0; nins < count; i++) {
 231                 if (aTab[i].key != NULL) {
 232                         key = aTab[i].key;
 233                         data = aTab[i].data;
 234                         if (ckh_try_insert(ckh, &key, &data)) {
 235                                 ckh->count = count;
 236                                 return (true);
 237                         }
 238                         nins++;
 239                 }
 240         }
 241
 242         return (false);
 243 }
 244
 245 static bool
 246 ckh_grow(ckh_t *ckh)
 247 {
 248         bool ret;
 249         ckhc_t *tab, *ttab;
 250         size_t lg_curcells;
 251         unsigned lg_prevbuckets;
 252
 253 #ifdef CKH_COUNT
 254         ckh->ngrows++;
 255 #endif
 256
 257         /*
 258          * It is possible (though unlikely, given well behaved hashes) that the
 259          * table will have to be doubled more than once in order to create a
 260          * usable table.
 261          */
 262         lg_prevbuckets = ckh->lg_curbuckets;
 263         lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
 264         while (true) {
 265                 size_t usize;
 266
 267                 lg_curcells++;
 268                 usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
 269                 if (usize == 0) {
 270                         ret = true;
 271                         goto RETURN;
 272                 }
 273                 tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
 274                 if (tab == NULL) {
 275                         ret = true;
 276                         goto RETURN;
 277                 }
 278                 /* Swap in new table. */
 279                 ttab = ckh->tab;
 280                 ckh->tab = tab;
 281                 tab = ttab;
 282                 ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 283
 284                 if (ckh_rebuild(ckh, tab) == false) {
 285                         idalloc(tab);
 286                         break;
 287                 }
 288
 289                 /* Rebuilding failed, so back out partially rebuilt table. */
 290                 idalloc(ckh->tab);
 291                 ckh->tab = tab;
 292                 ckh->lg_curbuckets = lg_prevbuckets;
 293         }
 294
 295         ret = false;
 296 RETURN:
 297         return (ret);
 298 }
 299
 300 static void
 301 ckh_shrink(ckh_t *ckh)
 302 {
 303         ckhc_t *tab, *ttab;
 304         size_t lg_curcells, usize;
 305         unsigned lg_prevbuckets;
 306
 307         /*
 308          * It is possible (though unlikely, given well behaved hashes) that the
 309          * table rebuild will fail.
 310          */
 311         lg_prevbuckets = ckh->lg_curbuckets;
 312         lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 313         usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
 314         if (usize == 0)
 315                 return;
 316         tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
 317         if (tab == NULL) {
 318                 /*
 319                  * An OOM error isn't worth propagating, since it doesn't
 320                  * prevent this or future operations from proceeding.
 321                  */
 322                 return;
 323         }
 324         /* Swap in new table. */
 325         ttab = ckh->tab;
 326         ckh->tab = tab;
 327         tab = ttab;
 328         ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 329
 330         if (ckh_rebuild(ckh, tab) == false) {
 331                 idalloc(tab);
 332 #ifdef CKH_COUNT
 333                 ckh->nshrinks++;
 334 #endif
 335                 return;
 336         }
 337
 338         /* Rebuilding failed, so back out partially rebuilt table. */
 339         idalloc(ckh->tab);
 340         ckh->tab = tab;
 341         ckh->lg_curbuckets = lg_prevbuckets;
 342 #ifdef CKH_COUNT
 343         ckh->nshrinkfails++;
 344 #endif
 345 }
 346
 347 bool
 348 ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 349 {
 350         bool ret;
 351         size_t mincells, usize;
 352         unsigned lg_mincells;
 353
 354         assert(minitems > 0);
 355         assert(hash != NULL);
 356         assert(keycomp != NULL);
 357
 358 #ifdef CKH_COUNT
 359         ckh->ngrows = 0;
 360         ckh->nshrinks = 0;
 361         ckh->nshrinkfails = 0;
 362         ckh->ninserts = 0;
 363         ckh->nrelocs = 0;
 364 #endif
 365         ckh->prn_state = 42; /* Value doesn't really matter. */
 366         ckh->count = 0;
 367
 368         /*
 369          * Find the minimum power of 2 that is large enough to fit aBaseCount
 370          * entries.  We are using (2+,2) cuckoo hashing, which has an expected
 371          * maximum load factor of at least ~0.86, so 0.75 is a conservative load
 372          * factor that will typically allow 2^aLgMinItems to fit without ever
 373          * growing the table.
 374          */
 375         assert(LG_CKH_BUCKET_CELLS > 0);
 376         mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
 377         for (lg_mincells = LG_CKH_BUCKET_CELLS;
 378             (ZU(1) << lg_mincells) < mincells;
 379             lg_mincells++)
 380                 ; /* Do nothing. */
 381         ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 382         ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 383         ckh->hash = hash;
 384         ckh->keycomp = keycomp;
 385
 386         usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
 387         if (usize == 0) {
 388                 ret = true;
 389                 goto RETURN;
 390         }
 391         ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
 392         if (ckh->tab == NULL) {
 393                 ret = true;
 394                 goto RETURN;
 395         }
 396
 397 #ifdef JEMALLOC_DEBUG
 398         ckh->magic = CKH_MAGIC;
 399 #endif
 400
 401         ret = false;
 402 RETURN:
 403         return (ret);
 404 }
 405
 406 void
 407 ckh_delete(ckh_t *ckh)
 408 {
 409
 410         assert(ckh != NULL);
 411         dassert(ckh->magic == CKH_MAGIC);
 412
 413 #ifdef CKH_VERBOSE
 414         malloc_printf(
 415             "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
 416             " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
 417             " nrelocs: %"PRIu64"\n", __func__, ckh,
 418             (unsigned long long)ckh->ngrows,
 419             (unsigned long long)ckh->nshrinks,
 420             (unsigned long long)ckh->nshrinkfails,
 421             (unsigned long long)ckh->ninserts,
 422             (unsigned long long)ckh->nrelocs);
 423 #endif
 424
 425         idalloc(ckh->tab);
 426 #ifdef JEMALLOC_DEBUG
 427         memset(ckh, 0x5a, sizeof(ckh_t));
 428 #endif
 429 }
 430
 431 size_t
 432 ckh_count(ckh_t *ckh)
 433 {
 434
 435         assert(ckh != NULL);
 436         dassert(ckh->magic == CKH_MAGIC);
 437
 438         return (ckh->count);
 439 }
 440
 441 bool
 442 ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 443 {
 444         size_t i, ncells;
 445
 446         for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
 447             LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
 448                 if (ckh->tab[i].key != NULL) {
 449                         if (key != NULL)
 450                                 *key = (void *)ckh->tab[i].key;
 451                         if (data != NULL)
 452                                 *data = (void *)ckh->tab[i].data;
 453                         *tabind = i + 1;
 454                         return (false);
 455                 }
 456         }
 457
 458         return (true);
 459 }
 460
 461 bool
 462 ckh_insert(ckh_t *ckh, const void *key, const void *data)
 463 {
 464         bool ret;
 465
 466         assert(ckh != NULL);
 467         dassert(ckh->magic == CKH_MAGIC);
 468         assert(ckh_search(ckh, key, NULL, NULL));
 469
 470 #ifdef CKH_COUNT
 471         ckh->ninserts++;
 472 #endif
 473
 474         while (ckh_try_insert(ckh, &key, &data)) {
 475                 if (ckh_grow(ckh)) {
 476                         ret = true;
 477                         goto RETURN;
 478                 }
 479         }
 480
 481         ret = false;
 482 RETURN:
 483         return (ret);
 484 }
 485
 486 bool
 487 ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
 488 {
 489         size_t cell;
 490
 491         assert(ckh != NULL);
 492         dassert(ckh->magic == CKH_MAGIC);
 493
 494         cell = ckh_isearch(ckh, searchkey);
 495         if (cell != SIZE_T_MAX) {
 496                 if (key != NULL)
 497                         *key = (void *)ckh->tab[cell].key;
 498                 if (data != NULL)
 499                         *data = (void *)ckh->tab[cell].data;
 500                 ckh->tab[cell].key = NULL;
 501                 ckh->tab[cell].data = NULL; /* Not necessary. */
 502
 503                 ckh->count--;
 504                 /* Try to halve the table if it is less than 1/4 full. */
 505                 if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
 506                     + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 507                     > ckh->lg_minbuckets) {
 508                         /* Ignore error due to OOM. */
 509                         ckh_shrink(ckh);
 510                 }
 511
 512                 return (false);
 513         }
 514
 515         return (true);
 516 }
 517
 518 bool
 519 ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 520 {
 521         size_t cell;
 522
 523         assert(ckh != NULL);
 524         dassert(ckh->magic == CKH_MAGIC);
 525
 526         cell = ckh_isearch(ckh, searchkey);
 527         if (cell != SIZE_T_MAX) {
 528                 if (key != NULL)
 529                         *key = (void *)ckh->tab[cell].key;
 530                 if (data != NULL)
 531                         *data = (void *)ckh->tab[cell].data;
 532                 return (false);
 533         }
 534
 535         return (true);
 536 }
 537
 538 void
 539 ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 540 {
 541         size_t ret1, ret2;
 542         uint64_t h;
 543
 544         assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
 545         assert(hash1 != NULL);
 546         assert(hash2 != NULL);
 547
 548         h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU);
 549         if (minbits <= 32) {
 550                 /*
 551                  * Avoid doing multiple hashes, since a single hash provides
 552                  * enough bits.
 553                  */
 554                 ret1 = h & ZU(0xffffffffU);
 555                 ret2 = h >> 32;
 556         } else {
 557                 ret1 = h;
 558                 ret2 = hash(key, strlen((const char *)key),
 559                     0x8432a476666bbc13U);
 560         }
 561
 562         *hash1 = ret1;
 563         *hash2 = ret2;
 564 }
 565
 566 bool
 567 ckh_string_keycomp(const void *k1, const void *k2)
 568 {
 569
 570     assert(k1 != NULL);
 571     assert(k2 != NULL);
 572
 573     return (strcmp((char *)k1, (char *)k2) ? false : true);
 574 }
 575
 576 void
 577 ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
 578     size_t *hash2)
 579 {
 580         size_t ret1, ret2;
 581         uint64_t h;
 582         union {
 583                 const void      *v;
 584                 uint64_t        i;
 585         } u;
 586
 587         assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
 588         assert(hash1 != NULL);
 589         assert(hash2 != NULL);
 590
 591         assert(sizeof(u.v) == sizeof(u.i));
 592 #if (LG_SIZEOF_PTR != LG_SIZEOF_INT)
 593         u.i = 0;
 594 #endif
 595         u.v = key;
 596         h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU);
 597         if (minbits <= 32) {
 598                 /*
 599                  * Avoid doing multiple hashes, since a single hash provides
 600                  * enough bits.
 601                  */
 602                 ret1 = h & ZU(0xffffffffU);
 603                 ret2 = h >> 32;
 604         } else {
 605                 assert(SIZEOF_PTR == 8);
 606                 ret1 = h;
 607                 ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU);
 608         }
 609
 610         *hash1 = ret1;
 611         *hash2 = ret2;
 612 }
 613
 614 bool
 615 ckh_pointer_keycomp(const void *k1, const void *k2)
 616 {
 617
 618         return ((k1 == k2) ? true : false);
 619 }