icuSources/common/uhash.c

   1 /*
   2 ******************************************************************************
   3 *   Copyright (C) 1997-2016, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 ******************************************************************************
   6 *   Date        Name        Description
   7 *   03/22/00    aliu        Adapted from original C++ ICU Hashtable.
   8 *   07/06/01    aliu        Modified to support int32_t keys on
   9 *                           platforms with sizeof(void*) < 32.
  10 ******************************************************************************
  11 */
  12
  13 #include "uhash.h"
  14 #include "unicode/ustring.h"
  15 #include "cstring.h"
  16 #include "cmemory.h"
  17 #include "uassert.h"
  18 #include "ustr_imp.h"
  19
  20 /* This hashtable is implemented as a double hash.  All elements are
  21  * stored in a single array with no secondary storage for collision
  22  * resolution (no linked list, etc.).  When there is a hash collision
  23  * (when two unequal keys have the same hashcode) we resolve this by
  24  * using a secondary hash.  The secondary hash is an increment
  25  * computed as a hash function (a different one) of the primary
  26  * hashcode.  This increment is added to the initial hash value to
  27  * obtain further slots assigned to the same hash code.  For this to
  28  * work, the length of the array and the increment must be relatively
  29  * prime.  The easiest way to achieve this is to have the length of
  30  * the array be prime, and the increment be any value from
  31  * 1..length-1.
  32  *
  33  * Hashcodes are 32-bit integers.  We make sure all hashcodes are
  34  * non-negative by masking off the top bit.  This has two effects: (1)
  35  * modulo arithmetic is simplified.  If we allowed negative hashcodes,
  36  * then when we computed hashcode % length, we could get a negative
  37  * result, which we would then have to adjust back into range.  It's
  38  * simpler to just make hashcodes non-negative. (2) It makes it easy
  39  * to check for empty vs. occupied slots in the table.  We just mark
  40  * empty or deleted slots with a negative hashcode.
  41  *
  42  * The central function is _uhash_find().  This function looks for a
  43  * slot matching the given key and hashcode.  If one is found, it
  44  * returns a pointer to that slot.  If the table is full, and no match
  45  * is found, it returns NULL -- in theory.  This would make the code
  46  * more complicated, since all callers of _uhash_find() would then
  47  * have to check for a NULL result.  To keep this from happening, we
  48  * don't allow the table to fill.  When there is only one
  49  * empty/deleted slot left, uhash_put() will refuse to increase the
  50  * count, and fail.  This simplifies the code.  In practice, one will
  51  * seldom encounter this using default UHashtables.  However, if a
  52  * hashtable is set to a U_FIXED resize policy, or if memory is
  53  * exhausted, then the table may fill.
  54  *
  55  * High and low water ratios control rehashing.  They establish levels
  56  * of fullness (from 0 to 1) outside of which the data array is
  57  * reallocated and repopulated.  Setting the low water ratio to zero
  58  * means the table will never shrink.  Setting the high water ratio to
  59  * one means the table will never grow.  The ratios should be
  60  * coordinated with the ratio between successive elements of the
  61  * PRIMES table, so that when the primeIndex is incremented or
  62  * decremented during rehashing, it brings the ratio of count / length
  63  * back into the desired range (between low and high water ratios).
  64  */
  65
  66 /********************************************************************
  67  * PRIVATE Constants, Macros
  68  ********************************************************************/
  69
  70 /* This is a list of non-consecutive primes chosen such that
  71  * PRIMES[i+1] ~ 2*PRIMES[i].  (Currently, the ratio ranges from 1.81
  72  * to 2.18; the inverse ratio ranges from 0.459 to 0.552.)  If this
  73  * ratio is changed, the low and high water ratios should also be
  74  * adjusted to suit.
  75  *
  76  * These prime numbers were also chosen so that they are the largest
  77  * prime number while being less than a power of two.
  78  */
  79 static const int32_t PRIMES[] = {
  80     7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
  81     65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
  82     16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
  83     1073741789, 2147483647 /*, 4294967291 */
  84 };
  85
  86 #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
  87 #define DEFAULT_PRIME_INDEX 4
  88
  89 /* These ratios are tuned to the PRIMES array such that a resize
  90  * places the table back into the zone of non-resizing.  That is,
  91  * after a call to _uhash_rehash(), a subsequent call to
  92  * _uhash_rehash() should do nothing (should not churn).  This is only
  93  * a potential problem with U_GROW_AND_SHRINK.
  94  */
  95 static const float RESIZE_POLICY_RATIO_TABLE[6] = {
  96     /* low, high water ratio */
  97     0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
  98     0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
  99     0.0F, 1.0F  /* U_FIXED: Never change size */
 100 };
 101
 102 /*
 103   Invariants for hashcode values:
 104
 105   * DELETED < 0
 106   * EMPTY < 0
 107   * Real hashes >= 0
 108
 109   Hashcodes may not start out this way, but internally they are
 110   adjusted so that they are always positive.  We assume 32-bit
 111   hashcodes; adjust these constants for other hashcode sizes.
 112 */
 113 #define HASH_DELETED    ((int32_t) 0x80000000)
 114 #define HASH_EMPTY      ((int32_t) HASH_DELETED + 1)
 115
 116 #define IS_EMPTY_OR_DELETED(x) ((x) < 0)
 117
 118 /* This macro expects a UHashTok.pointer as its keypointer and
 119    valuepointer parameters */
 120 #define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \
 121             if (hash->keyDeleter != NULL && keypointer != NULL) { \
 122                 (*hash->keyDeleter)(keypointer); \
 123             } \
 124             if (hash->valueDeleter != NULL && valuepointer != NULL) { \
 125                 (*hash->valueDeleter)(valuepointer); \
 126             }
 127
 128 /*
 129  * Constants for hinting whether a key or value is an integer
 130  * or a pointer.  If a hint bit is zero, then the associated
 131  * token is assumed to be an integer.
 132  */
 133 #define HINT_KEY_POINTER   (1)
 134 #define HINT_VALUE_POINTER (2)
 135
 136 /********************************************************************
 137  * PRIVATE Implementation
 138  ********************************************************************/
 139
 140 static UHashTok
 141 _uhash_setElement(UHashtable *hash, UHashElement* e,
 142                   int32_t hashcode,
 143                   UHashTok key, UHashTok value, int8_t hint) {
 144
 145     UHashTok oldValue = e->value;
 146     if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
 147         e->key.pointer != key.pointer) { /* Avoid double deletion */
 148         (*hash->keyDeleter)(e->key.pointer);
 149     }
 150     if (hash->valueDeleter != NULL) {
 151         if (oldValue.pointer != NULL &&
 152             oldValue.pointer != value.pointer) { /* Avoid double deletion */
 153             (*hash->valueDeleter)(oldValue.pointer);
 154         }
 155         oldValue.pointer = NULL;
 156     }
 157     /* Compilers should copy the UHashTok union correctly, but even if
 158      * they do, memory heap tools (e.g. BoundsChecker) can get
 159      * confused when a pointer is cloaked in a union and then copied.
 160      * TO ALLEVIATE THIS, we use hints (based on what API the user is
 161      * calling) to copy pointers when we know the user thinks
 162      * something is a pointer. */
 163     if (hint & HINT_KEY_POINTER) {
 164         e->key.pointer = key.pointer;
 165     } else {
 166         e->key = key;
 167     }
 168     if (hint & HINT_VALUE_POINTER) {
 169         e->value.pointer = value.pointer;
 170     } else {
 171         e->value = value;
 172     }
 173     e->hashcode = hashcode;
 174     return oldValue;
 175 }
 176
 177 /**
 178  * Assumes that the given element is not empty or deleted.
 179  */
 180 static UHashTok
 181 _uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
 182     UHashTok empty;
 183     U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
 184     --hash->count;
 185     empty.pointer = NULL; empty.integer = 0;
 186     return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
 187 }
 188
 189 static void
 190 _uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
 191     U_ASSERT(hash != NULL);
 192     U_ASSERT(((int32_t)policy) >= 0);
 193     U_ASSERT(((int32_t)policy) < 3);
 194     hash->lowWaterRatio  = RESIZE_POLICY_RATIO_TABLE[policy * 2];
 195     hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
 196 }
 197
 198 /**
 199  * Allocate internal data array of a size determined by the given
 200  * prime index.  If the index is out of range it is pinned into range.
 201  * If the allocation fails the status is set to
 202  * U_MEMORY_ALLOCATION_ERROR and all array storage is freed.  In
 203  * either case the previous array pointer is overwritten.
 204  *
 205  * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
 206  */
 207 static void
 208 _uhash_allocate(UHashtable *hash,
 209                 int32_t primeIndex,
 210                 UErrorCode *status) {
 211
 212     UHashElement *p, *limit;
 213     UHashTok emptytok;
 214
 215     if (U_FAILURE(*status)) return;
 216
 217     U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
 218
 219     hash->primeIndex = primeIndex;
 220     hash->length = PRIMES[primeIndex];
 221
 222     p = hash->elements = (UHashElement*)
 223         uprv_malloc(sizeof(UHashElement) * hash->length);
 224
 225     if (hash->elements == NULL) {
 226         *status = U_MEMORY_ALLOCATION_ERROR;
 227         return;
 228     }
 229
 230     emptytok.pointer = NULL; /* Only one of these two is needed */
 231     emptytok.integer = 0;    /* but we don't know which one. */
 232
 233     limit = p + hash->length;
 234     while (p < limit) {
 235         p->key = emptytok;
 236         p->value = emptytok;
 237         p->hashcode = HASH_EMPTY;
 238         ++p;
 239     }
 240
 241     hash->count = 0;
 242     hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
 243     hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
 244 }
 245
 246 static UHashtable*
 247 _uhash_init(UHashtable *result,
 248               UHashFunction *keyHash,
 249               UKeyComparator *keyComp,
 250               UValueComparator *valueComp,
 251               int32_t primeIndex,
 252               UErrorCode *status)
 253 {
 254     if (U_FAILURE(*status)) return NULL;
 255     U_ASSERT(keyHash != NULL);
 256     U_ASSERT(keyComp != NULL);
 257
 258     result->keyHasher       = keyHash;
 259     result->keyComparator   = keyComp;
 260     result->valueComparator = valueComp;
 261     result->keyDeleter      = NULL;
 262     result->valueDeleter    = NULL;
 263     result->allocated       = FALSE;
 264     _uhash_internalSetResizePolicy(result, U_GROW);
 265
 266     _uhash_allocate(result, primeIndex, status);
 267
 268     if (U_FAILURE(*status)) {
 269         return NULL;
 270     }
 271
 272     return result;
 273 }
 274
 275 static UHashtable*
 276 _uhash_create(UHashFunction *keyHash,
 277               UKeyComparator *keyComp,
 278               UValueComparator *valueComp,
 279               int32_t primeIndex,
 280               UErrorCode *status) {
 281     UHashtable *result;
 282
 283     if (U_FAILURE(*status)) return NULL;
 284
 285     result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
 286     if (result == NULL) {
 287         *status = U_MEMORY_ALLOCATION_ERROR;
 288         return NULL;
 289     }
 290
 291     _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
 292     result->allocated       = TRUE;
 293
 294     if (U_FAILURE(*status)) {
 295         uprv_free(result);
 296         return NULL;
 297     }
 298
 299     return result;
 300 }
 301
 302 /**
 303  * Look for a key in the table, or if no such key exists, the first
 304  * empty slot matching the given hashcode.  Keys are compared using
 305  * the keyComparator function.
 306  *
 307  * First find the start position, which is the hashcode modulo
 308  * the length.  Test it to see if it is:
 309  *
 310  * a. identical:  First check the hash values for a quick check,
 311  *    then compare keys for equality using keyComparator.
 312  * b. deleted
 313  * c. empty
 314  *
 315  * Stop if it is identical or empty, otherwise continue by adding a
 316  * "jump" value (moduloing by the length again to keep it within
 317  * range) and retesting.  For efficiency, there need enough empty
 318  * values so that the searchs stop within a reasonable amount of time.
 319  * This can be changed by changing the high/low water marks.
 320  *
 321  * In theory, this function can return NULL, if it is full (no empty
 322  * or deleted slots) and if no matching key is found.  In practice, we
 323  * prevent this elsewhere (in uhash_put) by making sure the last slot
 324  * in the table is never filled.
 325  *
 326  * The size of the table should be prime for this algorithm to work;
 327  * otherwise we are not guaranteed that the jump value (the secondary
 328  * hash) is relatively prime to the table length.
 329  */
 330 static UHashElement*
 331 _uhash_find(const UHashtable *hash, UHashTok key,
 332             int32_t hashcode) {
 333
 334     int32_t firstDeleted = -1;  /* assume invalid index */
 335     int32_t theIndex, startIndex;
 336     int32_t jump = 0; /* lazy evaluate */
 337     int32_t tableHash;
 338     UHashElement *elements = hash->elements;
 339
 340     hashcode &= 0x7FFFFFFF; /* must be positive */
 341     startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
 342
 343     do {
 344         tableHash = elements[theIndex].hashcode;
 345         if (tableHash == hashcode) {          /* quick check */
 346             if ((*hash->keyComparator)(key, elements[theIndex].key)) {
 347                 return &(elements[theIndex]);
 348             }
 349         } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
 350             /* We have hit a slot which contains a key-value pair,
 351              * but for which the hash code does not match.  Keep
 352              * looking.
 353              */
 354         } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
 355             break;
 356         } else if (firstDeleted < 0) { /* remember first deleted */
 357             firstDeleted = theIndex;
 358         }
 359         if (jump == 0) { /* lazy compute jump */
 360             /* The jump value must be relatively prime to the table
 361              * length.  As long as the length is prime, then any value
 362              * 1..length-1 will be relatively prime to it.
 363              */
 364             jump = (hashcode % (hash->length - 1)) + 1;
 365         }
 366         theIndex = (theIndex + jump) % hash->length;
 367     } while (theIndex != startIndex);
 368
 369     if (firstDeleted >= 0) {
 370         theIndex = firstDeleted; /* reset if had deleted slot */
 371     } else if (tableHash != HASH_EMPTY) {
 372         /* We get to this point if the hashtable is full (no empty or
 373          * deleted slots), and we've failed to find a match.  THIS
 374          * WILL NEVER HAPPEN as long as uhash_put() makes sure that
 375          * count is always < length.
 376          */
 377         U_ASSERT(FALSE);
 378         return NULL; /* Never happens if uhash_put() behaves */
 379     }
 380     return &(elements[theIndex]);
 381 }
 382
 383 /**
 384  * Attempt to grow or shrink the data arrays in order to make the
 385  * count fit between the high and low water marks.  hash_put() and
 386  * hash_remove() call this method when the count exceeds the high or
 387  * low water marks.  This method may do nothing, if memory allocation
 388  * fails, or if the count is already in range, or if the length is
 389  * already at the low or high limit.  In any case, upon return the
 390  * arrays will be valid.
 391  */
 392 static void
 393 _uhash_rehash(UHashtable *hash, UErrorCode *status) {
 394
 395     UHashElement *old = hash->elements;
 396     int32_t oldLength = hash->length;
 397     int32_t newPrimeIndex = hash->primeIndex;
 398     int32_t i;
 399
 400     if (hash->count > hash->highWaterMark) {
 401         if (++newPrimeIndex >= PRIMES_LENGTH) {
 402             return;
 403         }
 404     } else if (hash->count < hash->lowWaterMark) {
 405         if (--newPrimeIndex < 0) {
 406             return;
 407         }
 408     } else {
 409         return;
 410     }
 411
 412     _uhash_allocate(hash, newPrimeIndex, status);
 413
 414     if (U_FAILURE(*status)) {
 415         hash->elements = old;
 416         hash->length = oldLength;
 417         return;
 418     }
 419
 420     for (i = oldLength - 1; i >= 0; --i) {
 421         if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
 422             UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
 423             U_ASSERT(e != NULL);
 424             U_ASSERT(e->hashcode == HASH_EMPTY);
 425             e->key = old[i].key;
 426             e->value = old[i].value;
 427             e->hashcode = old[i].hashcode;
 428             ++hash->count;
 429         }
 430     }
 431
 432     uprv_free(old);
 433 }
 434
 435 static UHashTok
 436 _uhash_remove(UHashtable *hash,
 437               UHashTok key) {
 438     /* First find the position of the key in the table.  If the object
 439      * has not been removed already, remove it.  If the user wanted
 440      * keys deleted, then delete it also.  We have to put a special
 441      * hashcode in that position that means that something has been
 442      * deleted, since when we do a find, we have to continue PAST any
 443      * deleted values.
 444      */
 445     UHashTok result;
 446     UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
 447     U_ASSERT(e != NULL);
 448     result.pointer = NULL;
 449     result.integer = 0;
 450     if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
 451         result = _uhash_internalRemoveElement(hash, e);
 452         if (hash->count < hash->lowWaterMark) {
 453             UErrorCode status = U_ZERO_ERROR;
 454             _uhash_rehash(hash, &status);
 455         }
 456     }
 457     return result;
 458 }
 459
 460 static UHashTok
 461 _uhash_put(UHashtable *hash,
 462            UHashTok key,
 463            UHashTok value,
 464            int8_t hint,
 465            UErrorCode *status) {
 466
 467     /* Put finds the position in the table for the new value.  If the
 468      * key is already in the table, it is deleted, if there is a
 469      * non-NULL keyDeleter.  Then the key, the hash and the value are
 470      * all put at the position in their respective arrays.
 471      */
 472     int32_t hashcode;
 473     UHashElement* e;
 474     UHashTok emptytok;
 475
 476     if (U_FAILURE(*status)) {
 477         goto err;
 478     }
 479     U_ASSERT(hash != NULL);
 480     /* Cannot always check pointer here or iSeries sees NULL every time. */
 481     if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
 482         /* Disallow storage of NULL values, since NULL is returned by
 483          * get() to indicate an absent key.  Storing NULL == removing.
 484          */
 485         return _uhash_remove(hash, key);
 486     }
 487     if (hash->count > hash->highWaterMark) {
 488         _uhash_rehash(hash, status);
 489         if (U_FAILURE(*status)) {
 490             goto err;
 491         }
 492     }
 493
 494     hashcode = (*hash->keyHasher)(key);
 495     e = _uhash_find(hash, key, hashcode);
 496     U_ASSERT(e != NULL);
 497
 498     if (IS_EMPTY_OR_DELETED(e->hashcode)) {
 499         /* Important: We must never actually fill the table up.  If we
 500          * do so, then _uhash_find() will return NULL, and we'll have
 501          * to check for NULL after every call to _uhash_find().  To
 502          * avoid this we make sure there is always at least one empty
 503          * or deleted slot in the table.  This only is a problem if we
 504          * are out of memory and rehash isn't working.
 505          */
 506         ++hash->count;
 507         if (hash->count == hash->length) {
 508             /* Don't allow count to reach length */
 509             --hash->count;
 510             *status = U_MEMORY_ALLOCATION_ERROR;
 511             goto err;
 512         }
 513     }
 514
 515     /* We must in all cases handle storage properly.  If there was an
 516      * old key, then it must be deleted (if the deleter != NULL).
 517      * Make hashcodes stored in table positive.
 518      */
 519     return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
 520
 521  err:
 522     /* If the deleters are non-NULL, this method adopts its key and/or
 523      * value arguments, and we must be sure to delete the key and/or
 524      * value in all cases, even upon failure.
 525      */
 526     HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
 527     emptytok.pointer = NULL; emptytok.integer = 0;
 528     return emptytok;
 529 }
 530
 531
 532 /********************************************************************
 533  * PUBLIC API
 534  ********************************************************************/
 535
 536 U_CAPI UHashtable* U_EXPORT2
 537 uhash_open(UHashFunction *keyHash,
 538            UKeyComparator *keyComp,
 539            UValueComparator *valueComp,
 540            UErrorCode *status) {
 541
 542     return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
 543 }
 544
 545 U_CAPI UHashtable* U_EXPORT2
 546 uhash_openSize(UHashFunction *keyHash,
 547                UKeyComparator *keyComp,
 548                UValueComparator *valueComp,
 549                int32_t size,
 550                UErrorCode *status) {
 551
 552     /* Find the smallest index i for which PRIMES[i] >= size. */
 553     int32_t i = 0;
 554     while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
 555         ++i;
 556     }
 557
 558     return _uhash_create(keyHash, keyComp, valueComp, i, status);
 559 }
 560
 561 U_CAPI UHashtable* U_EXPORT2
 562 uhash_init(UHashtable *fillinResult,
 563            UHashFunction *keyHash,
 564            UKeyComparator *keyComp,
 565            UValueComparator *valueComp,
 566            UErrorCode *status) {
 567
 568     return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
 569 }
 570
 571 U_CAPI UHashtable* U_EXPORT2
 572 uhash_initSize(UHashtable *fillinResult,
 573                UHashFunction *keyHash,
 574                UKeyComparator *keyComp,
 575                UValueComparator *valueComp,
 576                int32_t size,
 577                UErrorCode *status) {
 578
 579     /* Find the smallest index i for which PRIMES[i] >= size. */
 580     int32_t i = 0;
 581     while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
 582         ++i;
 583     }
 584
 585     return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
 586 }
 587
 588 U_CAPI void U_EXPORT2
 589 uhash_close(UHashtable *hash) {
 590     if (hash == NULL) {
 591         return;
 592     }
 593     if (hash->elements != NULL) {
 594         if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
 595             int32_t pos=UHASH_FIRST;
 596             UHashElement *e;
 597             while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
 598                 HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
 599             }
 600         }
 601         uprv_free(hash->elements);
 602         hash->elements = NULL;
 603     }
 604     if (hash->allocated) {
 605         uprv_free(hash);
 606     }
 607 }
 608
 609 U_CAPI UHashFunction *U_EXPORT2
 610 uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
 611     UHashFunction *result = hash->keyHasher;
 612     hash->keyHasher = fn;
 613     return result;
 614 }
 615
 616 U_CAPI UKeyComparator *U_EXPORT2
 617 uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
 618     UKeyComparator *result = hash->keyComparator;
 619     hash->keyComparator = fn;
 620     return result;
 621 }
 622 U_CAPI UValueComparator *U_EXPORT2
 623 uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
 624     UValueComparator *result = hash->valueComparator;
 625     hash->valueComparator = fn;
 626     return result;
 627 }
 628
 629 U_CAPI UObjectDeleter *U_EXPORT2
 630 uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
 631     UObjectDeleter *result = hash->keyDeleter;
 632     hash->keyDeleter = fn;
 633     return result;
 634 }
 635
 636 U_CAPI UObjectDeleter *U_EXPORT2
 637 uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
 638     UObjectDeleter *result = hash->valueDeleter;
 639     hash->valueDeleter = fn;
 640     return result;
 641 }
 642
 643 U_CAPI void U_EXPORT2
 644 uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
 645     UErrorCode status = U_ZERO_ERROR;
 646     _uhash_internalSetResizePolicy(hash, policy);
 647     hash->lowWaterMark  = (int32_t)(hash->length * hash->lowWaterRatio);
 648     hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
 649     _uhash_rehash(hash, &status);
 650 }
 651
 652 U_CAPI int32_t U_EXPORT2
 653 uhash_count(const UHashtable *hash) {
 654     return hash->count;
 655 }
 656
 657 U_CAPI void* U_EXPORT2
 658 uhash_get(const UHashtable *hash,
 659           const void* key) {
 660     UHashTok keyholder;
 661     keyholder.pointer = (void*) key;
 662     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
 663 }
 664
 665 U_CAPI void* U_EXPORT2
 666 uhash_iget(const UHashtable *hash,
 667            int32_t key) {
 668     UHashTok keyholder;
 669     keyholder.integer = key;
 670     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
 671 }
 672
 673 U_CAPI int32_t U_EXPORT2
 674 uhash_geti(const UHashtable *hash,
 675            const void* key) {
 676     UHashTok keyholder;
 677     keyholder.pointer = (void*) key;
 678     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
 679 }
 680
 681 U_CAPI int32_t U_EXPORT2
 682 uhash_igeti(const UHashtable *hash,
 683            int32_t key) {
 684     UHashTok keyholder;
 685     keyholder.integer = key;
 686     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
 687 }
 688
 689 U_CAPI void* U_EXPORT2
 690 uhash_put(UHashtable *hash,
 691           void* key,
 692           void* value,
 693           UErrorCode *status) {
 694     UHashTok keyholder, valueholder;
 695     keyholder.pointer = key;
 696     valueholder.pointer = value;
 697     return _uhash_put(hash, keyholder, valueholder,
 698                       HINT_KEY_POINTER | HINT_VALUE_POINTER,
 699                       status).pointer;
 700 }
 701
 702 U_CAPI void* U_EXPORT2
 703 uhash_iput(UHashtable *hash,
 704            int32_t key,
 705            void* value,
 706            UErrorCode *status) {
 707     UHashTok keyholder, valueholder;
 708     keyholder.integer = key;
 709     valueholder.pointer = value;
 710     return _uhash_put(hash, keyholder, valueholder,
 711                       HINT_VALUE_POINTER,
 712                       status).pointer;
 713 }
 714
 715 U_CAPI int32_t U_EXPORT2
 716 uhash_puti(UHashtable *hash,
 717            void* key,
 718            int32_t value,
 719            UErrorCode *status) {
 720     UHashTok keyholder, valueholder;
 721     keyholder.pointer = key;
 722     valueholder.integer = value;
 723     return _uhash_put(hash, keyholder, valueholder,
 724                       HINT_KEY_POINTER,
 725                       status).integer;
 726 }
 727
 728
 729 U_CAPI int32_t U_EXPORT2
 730 uhash_iputi(UHashtable *hash,
 731            int32_t key,
 732            int32_t value,
 733            UErrorCode *status) {
 734     UHashTok keyholder, valueholder;
 735     keyholder.integer = key;
 736     valueholder.integer = value;
 737     return _uhash_put(hash, keyholder, valueholder,
 738                       0, /* neither is a ptr */
 739                       status).integer;
 740 }
 741
 742 U_CAPI void* U_EXPORT2
 743 uhash_remove(UHashtable *hash,
 744              const void* key) {
 745     UHashTok keyholder;
 746     keyholder.pointer = (void*) key;
 747     return _uhash_remove(hash, keyholder).pointer;
 748 }
 749
 750 U_CAPI void* U_EXPORT2
 751 uhash_iremove(UHashtable *hash,
 752               int32_t key) {
 753     UHashTok keyholder;
 754     keyholder.integer = key;
 755     return _uhash_remove(hash, keyholder).pointer;
 756 }
 757
 758 U_CAPI int32_t U_EXPORT2
 759 uhash_removei(UHashtable *hash,
 760               const void* key) {
 761     UHashTok keyholder;
 762     keyholder.pointer = (void*) key;
 763     return _uhash_remove(hash, keyholder).integer;
 764 }
 765
 766 U_CAPI int32_t U_EXPORT2
 767 uhash_iremovei(UHashtable *hash,
 768                int32_t key) {
 769     UHashTok keyholder;
 770     keyholder.integer = key;
 771     return _uhash_remove(hash, keyholder).integer;
 772 }
 773
 774 U_CAPI void U_EXPORT2
 775 uhash_removeAll(UHashtable *hash) {
 776     int32_t pos = UHASH_FIRST;
 777     const UHashElement *e;
 778     U_ASSERT(hash != NULL);
 779     if (hash->count != 0) {
 780         while ((e = uhash_nextElement(hash, &pos)) != NULL) {
 781             uhash_removeElement(hash, e);
 782         }
 783     }
 784     U_ASSERT(hash->count == 0);
 785 }
 786
 787 U_CAPI const UHashElement* U_EXPORT2
 788 uhash_find(const UHashtable *hash, const void* key) {
 789     UHashTok keyholder;
 790     const UHashElement *e;
 791     keyholder.pointer = (void*) key;
 792     e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
 793     return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
 794 }
 795
 796 U_CAPI const UHashElement* U_EXPORT2
 797 uhash_nextElement(const UHashtable *hash, int32_t *pos) {
 798     /* Walk through the array until we find an element that is not
 799      * EMPTY and not DELETED.
 800      */
 801     int32_t i;
 802     U_ASSERT(hash != NULL);
 803     for (i = *pos + 1; i < hash->length; ++i) {
 804         if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
 805             *pos = i;
 806             return &(hash->elements[i]);
 807         }
 808     }
 809
 810     /* No more elements */
 811     return NULL;
 812 }
 813
 814 U_CAPI void* U_EXPORT2
 815 uhash_removeElement(UHashtable *hash, const UHashElement* e) {
 816     U_ASSERT(hash != NULL);
 817     U_ASSERT(e != NULL);
 818     if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
 819         UHashElement *nce = (UHashElement *)e;
 820         return _uhash_internalRemoveElement(hash, nce).pointer;
 821     }
 822     return NULL;
 823 }
 824
 825 /********************************************************************
 826  * UHashTok convenience
 827  ********************************************************************/
 828
 829 /**
 830  * Return a UHashTok for an integer.
 831  */
 832 /*U_CAPI UHashTok U_EXPORT2
 833 uhash_toki(int32_t i) {
 834     UHashTok tok;
 835     tok.integer = i;
 836     return tok;
 837 }*/
 838
 839 /**
 840  * Return a UHashTok for a pointer.
 841  */
 842 /*U_CAPI UHashTok U_EXPORT2
 843 uhash_tokp(void* p) {
 844     UHashTok tok;
 845     tok.pointer = p;
 846     return tok;
 847 }*/
 848
 849 /********************************************************************
 850  * PUBLIC Key Hash Functions
 851  ********************************************************************/
 852
 853 U_CAPI int32_t U_EXPORT2
 854 uhash_hashUChars(const UHashTok key) {
 855     const UChar *s = (const UChar *)key.pointer;
 856     return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
 857 }
 858
 859 U_CAPI int32_t U_EXPORT2
 860 uhash_hashChars(const UHashTok key) {
 861     const char *s = (const char *)key.pointer;
 862     return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s));
 863 }
 864
 865 U_CAPI int32_t U_EXPORT2
 866 uhash_hashIChars(const UHashTok key) {
 867     const char *s = (const char *)key.pointer;
 868     return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
 869 }
 870
 871 U_CAPI UBool U_EXPORT2
 872 uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
 873     int32_t count1, count2, pos, i;
 874
 875     if(hash1==hash2){
 876         return TRUE;
 877     }
 878
 879     /*
 880      * Make sure that we are comparing 2 valid hashes of the same type
 881      * with valid comparison functions.
 882      * Without valid comparison functions, a binary comparison
 883      * of the hash values will yield random results on machines
 884      * with 64-bit pointers and 32-bit integer hashes.
 885      * A valueComparator is normally optional.
 886      */
 887     if (hash1==NULL || hash2==NULL ||
 888         hash1->keyComparator != hash2->keyComparator ||
 889         hash1->valueComparator != hash2->valueComparator ||
 890         hash1->valueComparator == NULL)
 891     {
 892         /*
 893         Normally we would return an error here about incompatible hash tables,
 894         but we return FALSE instead.
 895         */
 896         return FALSE;
 897     }
 898
 899     count1 = uhash_count(hash1);
 900     count2 = uhash_count(hash2);
 901     if(count1!=count2){
 902         return FALSE;
 903     }
 904
 905     pos=UHASH_FIRST;
 906     for(i=0; i<count1; i++){
 907         const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
 908         const UHashTok key1 = elem1->key;
 909         const UHashTok val1 = elem1->value;
 910         /* here the keys are not compared, instead the key form hash1 is used to fetch
 911          * value from hash2. If the hashes are equal then then both hashes should
 912          * contain equal values for the same key!
 913          */
 914         const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
 915         const UHashTok val2 = elem2->value;
 916         if(hash1->valueComparator(val1, val2)==FALSE){
 917             return FALSE;
 918         }
 919     }
 920     return TRUE;
 921 }
 922
 923 /********************************************************************
 924  * PUBLIC Comparator Functions
 925  ********************************************************************/
 926
 927 U_CAPI UBool U_EXPORT2
 928 uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
 929     const UChar *p1 = (const UChar*) key1.pointer;
 930     const UChar *p2 = (const UChar*) key2.pointer;
 931     if (p1 == p2) {
 932         return TRUE;
 933     }
 934     if (p1 == NULL || p2 == NULL) {
 935         return FALSE;
 936     }
 937     while (*p1 != 0 && *p1 == *p2) {
 938         ++p1;
 939         ++p2;
 940     }
 941     return (UBool)(*p1 == *p2);
 942 }
 943
 944 U_CAPI UBool U_EXPORT2
 945 uhash_compareChars(const UHashTok key1, const UHashTok key2) {
 946     const char *p1 = (const char*) key1.pointer;
 947     const char *p2 = (const char*) key2.pointer;
 948     if (p1 == p2) {
 949         return TRUE;
 950     }
 951     if (p1 == NULL || p2 == NULL) {
 952         return FALSE;
 953     }
 954     while (*p1 != 0 && *p1 == *p2) {
 955         ++p1;
 956         ++p2;
 957     }
 958     return (UBool)(*p1 == *p2);
 959 }
 960
 961 U_CAPI UBool U_EXPORT2
 962 uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
 963     const char *p1 = (const char*) key1.pointer;
 964     const char *p2 = (const char*) key2.pointer;
 965     if (p1 == p2) {
 966         return TRUE;
 967     }
 968     if (p1 == NULL || p2 == NULL) {
 969         return FALSE;
 970     }
 971     while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
 972         ++p1;
 973         ++p2;
 974     }
 975     return (UBool)(*p1 == *p2);
 976 }
 977
 978 /********************************************************************
 979  * PUBLIC int32_t Support Functions
 980  ********************************************************************/
 981
 982 U_CAPI int32_t U_EXPORT2
 983 uhash_hashLong(const UHashTok key) {
 984     return key.integer;
 985 }
 986
 987 U_CAPI UBool U_EXPORT2
 988 uhash_compareLong(const UHashTok key1, const UHashTok key2) {
 989     return (UBool)(key1.integer == key2.integer);
 990 }