libkern/kxld/kxld_dict.c

   1 /*
   2  * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <string.h>
  29 #include <sys/types.h>
  30
  31 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
  32 #include <AssertMacros.h>
  33
  34 #include "kxld_dict.h"
  35 #include "kxld_util.h"
  36
  37 /*******************************************************************************
  38 * Types and macros
  39 *******************************************************************************/
  40
  41 /* Ratio of num_entries:num_buckets that will cause a resize */
  42 #define RESIZE_NUMER 7
  43 #define RESIZE_DENOM 10
  44 #define RESIZE_THRESHOLD(x) (((x)*RESIZE_NUMER) / RESIZE_DENOM)
  45 #define MIN_BUCKETS(x) (((x)*RESIZE_DENOM) / RESIZE_NUMER)
  46
  47 /* Selected for good scaling qualities when resizing dictionary
  48  * ... see: http://www.concentric.net/~ttwang/tech/hashsize.htm
  49  */
  50 #define DEFAULT_DICT_SIZE 89
  51
  52 typedef struct dict_entry DictEntry;
  53
  54 typedef enum {
  55     EMPTY = 0,
  56     USED = 1,
  57     DELETED = 2
  58 } DictEntryState;
  59
  60 struct dict_entry {
  61     const void *key;
  62     void *value;
  63     DictEntryState state;
  64 };
  65
  66 /*******************************************************************************
  67 * Function prototypes
  68 *******************************************************************************/
  69
  70 static kern_return_t get_locate_index(const KXLDDict *dict, const void *key,
  71     u_int *idx);
  72 static kern_return_t get_insert_index(const KXLDDict *dict, const void *key,
  73     u_int *idx);
  74 static kern_return_t resize_dict(KXLDDict *dict);
  75
  76 /*******************************************************************************
  77 *******************************************************************************/
  78 kern_return_t
  79 kxld_dict_init(KXLDDict * dict, kxld_dict_hash hash, kxld_dict_cmp cmp,
  80     u_int num_entries)
  81 {
  82     kern_return_t rval = KERN_FAILURE;
  83     u_int min_buckets = MIN_BUCKETS(num_entries);
  84     u_int num_buckets = DEFAULT_DICT_SIZE;
  85
  86     check(dict);
  87     check(hash);
  88     check(cmp);
  89
  90     /* We want the number of allocated buckets to be at least twice that of the
  91      * number to be inserted.
  92      */
  93     while (min_buckets > num_buckets) {
  94         num_buckets *= 2;
  95         num_buckets++;
  96     }
  97
  98     /* Allocate enough buckets for the anticipated number of entries */
  99     rval = kxld_array_init(&dict->buckets, sizeof(DictEntry), num_buckets);
 100     require_noerr(rval, finish);
 101
 102     /* Initialize */
 103     dict->hash = hash;
 104     dict->cmp = cmp;
 105     dict->num_entries = 0;
 106     dict->resize_threshold = RESIZE_THRESHOLD(num_buckets);
 107
 108     rval = KERN_SUCCESS;
 109
 110 finish:
 111     return rval;
 112 }
 113
 114 /*******************************************************************************
 115 *******************************************************************************/
 116 void
 117 kxld_dict_clear(KXLDDict *dict)
 118 {
 119     check(dict);
 120
 121     dict->hash = NULL;
 122     dict->cmp = NULL;
 123     dict->num_entries = 0;
 124     dict->resize_threshold = 0;
 125     kxld_array_clear(&dict->buckets);
 126     kxld_array_clear(&dict->resize_buckets);
 127 }
 128
 129 /*******************************************************************************
 130 *******************************************************************************/
 131 void
 132 kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict)
 133 {
 134     check(iter);
 135     check(dict);
 136
 137     iter->idx = 0;
 138     iter->dict = dict;
 139 }
 140
 141 /*******************************************************************************
 142 *******************************************************************************/
 143 void
 144 kxld_dict_deinit(KXLDDict *dict)
 145 {
 146     check(dict);
 147
 148     kxld_array_deinit(&dict->buckets);
 149     kxld_array_deinit(&dict->resize_buckets);
 150 }
 151
 152 /*******************************************************************************
 153 *******************************************************************************/
 154 u_int
 155 kxld_dict_get_num_entries(const KXLDDict *dict)
 156 {
 157     check(dict);
 158
 159     return dict->num_entries;
 160 }
 161
 162 /*******************************************************************************
 163 *******************************************************************************/
 164 void *
 165 kxld_dict_find(const KXLDDict *dict, const void *key)
 166 {
 167     kern_return_t rval = KERN_FAILURE;
 168     DictEntry *entry = NULL;
 169     u_int idx = 0;
 170
 171     check(dict);
 172     check(key);
 173
 174     rval = get_locate_index(dict, key, &idx);
 175     if (rval) return NULL;
 176
 177     entry = kxld_array_get_item(&dict->buckets, idx);
 178
 179     return entry->value;
 180 }
 181
 182 /*******************************************************************************
 183 * This dictionary uses linear probing, which means that when there is a
 184 * collision, we just walk along the buckets until a free bucket shows up.
 185 * A consequence of this is that when looking up an item, items that lie between
 186 * its hash value and its actual bucket may have been deleted since it was
 187 * inserted.  Thus, we should only stop a lookup when we've wrapped around the
 188 * dictionary or encountered an EMPTY bucket.
 189 ********************************************************************************/
 190 static kern_return_t
 191 get_locate_index(const KXLDDict *dict, const void *key, u_int *_idx)
 192 {
 193     kern_return_t rval = KERN_FAILURE;
 194     DictEntry *entry = NULL;
 195     u_int base, idx;
 196
 197     base = idx = dict->hash(dict, key);
 198
 199     /* Iterate until we match the key, wrap, or hit an empty bucket */
 200     entry = kxld_array_get_item(&dict->buckets, idx);
 201     while (!dict->cmp(entry->key, key)) {
 202         if (entry->state == EMPTY) goto finish;
 203
 204         idx = (idx + 1) % dict->buckets.nitems;
 205         if (idx == base) goto finish;
 206
 207         entry = kxld_array_get_item(&dict->buckets, idx);
 208     }
 209
 210     check(idx < dict->buckets.nitems);
 211
 212     *_idx = idx;
 213     rval = KERN_SUCCESS;
 214
 215 finish:
 216     return rval;
 217 }
 218
 219 /*******************************************************************************
 220 *******************************************************************************/
 221 kern_return_t
 222 kxld_dict_insert(KXLDDict *dict, const void *key, void *value)
 223 {
 224     kern_return_t rval = KERN_FAILURE;
 225     DictEntry *entry = NULL;
 226     u_int idx = 0;
 227
 228     check(dict);
 229     check(key);
 230     check(value);
 231
 232     /* Resize if we are greater than the capacity threshold.
 233      * Note: this is expensive, but the dictionary can be sized correctly at
 234      * construction to avoid ever having to do this.
 235      */
 236     while (dict->num_entries > dict->resize_threshold) {
 237         rval = resize_dict(dict);
 238         require_noerr(rval, finish);
 239     }
 240
 241     /* If this function returns FULL after we've already resized appropriately
 242      * something is very wrong and we should return an error.
 243      */
 244     rval = get_insert_index(dict, key, &idx);
 245     require_noerr(rval, finish);
 246
 247     /* Insert the new key-value pair into the bucket, but only count it as a
 248      * new entry if we are not overwriting an existing entry.
 249      */
 250     entry = kxld_array_get_item(&dict->buckets, idx);
 251     if (entry->state != USED) {
 252         dict->num_entries++;
 253         entry->key = key;
 254         entry->state = USED;
 255     }
 256     entry->value = value;
 257
 258     rval = KERN_SUCCESS;
 259
 260 finish:
 261     return rval;
 262 }
 263
 264 /*******************************************************************************
 265 * Increases the hash table's capacity by 2N+1.  Uses dictionary API.  Not
 266 * fast; just correct.
 267 *******************************************************************************/
 268 static kern_return_t
 269 resize_dict(KXLDDict *dict)
 270 {
 271     kern_return_t rval = KERN_FAILURE;
 272     KXLDArray tmparray;
 273     DictEntry *entry = NULL;
 274     u_int nbuckets = (dict->buckets.nitems * 2 + 1);
 275     u_int i = 0;
 276
 277     check(dict);
 278
 279     /* Initialize a new set of buckets to hold more entries */
 280     rval = kxld_array_init(&dict->resize_buckets, sizeof(DictEntry), nbuckets);
 281     require_noerr(rval, finish);
 282
 283     /* Swap the new buckets with the old buckets */
 284     tmparray = dict->buckets;
 285     dict->buckets = dict->resize_buckets;
 286     dict->resize_buckets = tmparray;
 287
 288     /* Reset dictionary parameters */
 289     dict->num_entries = 0;
 290     dict->resize_threshold = RESIZE_THRESHOLD(dict->buckets.nitems);
 291
 292     /* Rehash all of the entries */
 293     for (i = 0; i < dict->resize_buckets.nitems; ++i) {
 294         entry = kxld_array_get_item(&dict->resize_buckets, i);
 295         if (entry->state == USED) {
 296             rval = kxld_dict_insert(dict, entry->key, entry->value);
 297             require_noerr(rval, finish);
 298         }
 299     }
 300
 301     /* Clear the old buckets */
 302     kxld_array_clear(&dict->resize_buckets);
 303
 304     rval = KERN_SUCCESS;
 305
 306 finish:
 307     return rval;
 308 }
 309
 310 /*******************************************************************************
 311 * Simple function to find the first empty cell
 312 *******************************************************************************/
 313 static kern_return_t
 314 get_insert_index(const KXLDDict *dict, const void *key, u_int *r_index)
 315 {
 316     kern_return_t rval = KERN_FAILURE;
 317     DictEntry *entry = NULL;
 318     u_int base, idx;
 319
 320     base = idx = dict->hash(dict, key);
 321
 322     /* Iterate through the buckets until we find an EMPTY bucket, a DELETED
 323      * bucket, or a key match.
 324      */
 325     entry = kxld_array_get_item(&dict->buckets, idx);
 326     while (entry->state == USED && !dict->cmp(entry->key, key)) {
 327         idx = (idx + 1) % dict->buckets.nitems;
 328         require_action(base != idx, finish, rval=KERN_FAILURE);
 329         entry = kxld_array_get_item(&dict->buckets, idx);
 330     }
 331
 332     *r_index = idx;
 333     rval = KERN_SUCCESS;
 334
 335 finish:
 336     return rval;
 337 }
 338
 339 /*******************************************************************************
 340 *******************************************************************************/
 341 void
 342 kxld_dict_remove(KXLDDict *dict, const void *key, void **value)
 343 {
 344     kern_return_t rval = KERN_FAILURE;
 345     DictEntry *entry = NULL;
 346     u_int idx = 0;
 347
 348     check(dict);
 349     check(key);
 350
 351     /* Find the item */
 352     rval = get_locate_index(dict, key, &idx);
 353     if (rval) {
 354         if (value) *value = NULL;
 355         return;
 356     }
 357
 358     entry = kxld_array_get_item(&dict->buckets, idx);
 359
 360     /* Save the value if requested */
 361     if (value) *value = entry->value;
 362
 363     /* Delete the item from the dictionary */
 364     entry->key = NULL;
 365     entry->value = NULL;
 366     entry->state = DELETED;
 367     dict->num_entries--;
 368 }
 369
 370 /*******************************************************************************
 371 *******************************************************************************/
 372 void
 373 kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key,
 374     void **value)
 375 {
 376     DictEntry *entry = NULL;
 377
 378     check(iter);
 379     check(key);
 380     check(value);
 381
 382     *key = NULL;
 383     *value = NULL;
 384
 385     /* Walk over the dictionary looking for USED buckets */
 386     for (; iter->idx < iter->dict->buckets.nitems; ++(iter->idx)) {
 387         entry = kxld_array_get_item(&iter->dict->buckets, iter->idx);
 388         if (entry->state == USED) {
 389             *key = entry->key;
 390             *value = entry->value;
 391             ++(iter->idx);
 392             break;
 393         }
 394     }
 395 }
 396
 397 /*******************************************************************************
 398 *******************************************************************************/
 399 void
 400 kxld_dict_iterator_reset(KXLDDictIterator *iter)
 401 {
 402     iter->idx = 0;
 403 }
 404
 405 /*******************************************************************************
 406 * This is Daniel Bernstein's hash algorithm from comp.lang.c
 407 * It's fast and distributes well.  Returns an idx into the symbol hash table.
 408 * NOTE: Will not check for a valid pointer - performance
 409 *******************************************************************************/
 410 u_int
 411 kxld_dict_string_hash(const KXLDDict *dict, const void *_key)
 412 {
 413     const char *key = _key;
 414     u_int c = 0;
 415     u_int hash_val = 5381;
 416
 417     check(dict);
 418     check(_key);
 419
 420     while ((c = *key++)) {
 421         /* hash(i) = hash(i-1) *33 ^ name[i] */
 422         hash_val = ((hash_val << 5) + hash_val) ^ c;
 423     }
 424
 425     return (hash_val % dict->buckets.nitems);
 426 }
 427
 428 u_int
 429 kxld_dict_uint32_hash(const KXLDDict *dict, const void *_key)
 430 {
 431     uint32_t key = *(const uint32_t *) _key;
 432
 433     check(_key);
 434
 435     return (u_int) (key % dict->buckets.nitems);
 436 }
 437
 438 u_int
 439 kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *_key)
 440 {
 441     kxld_addr_t key = *(const kxld_addr_t *) _key;
 442
 443     check(_key);
 444
 445     return (u_int) (key % dict->buckets.nitems);
 446 }
 447
 448 u_int
 449 kxld_dict_string_cmp(const void *key1, const void *key2)
 450 {
 451     return streq(key1, key2);
 452 }
 453
 454 u_int
 455 kxld_dict_uint32_cmp(const void *key1, const void *key2)
 456 {
 457     const uint32_t *a = key1;
 458     const uint32_t *b = key2;
 459
 460     return (a && b && (*a == *b));
 461 }
 462
 463 u_int
 464 kxld_dict_kxldaddr_cmp(const void *key1, const void *key2)
 465 {
 466     const kxld_addr_t *a = key1;
 467     const kxld_addr_t *b = key2;
 468
 469     return (a && b && (*a == *b));
 470 }
 471