libkern/kxld/kxld_dict.c

   1 /*
   2  * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <string.h>
  29 #include <sys/types.h>
  30
  31 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
  32 #include <AssertMacros.h>
  33
  34 #include "kxld_dict.h"
  35 #include "kxld_util.h"
  36
  37 /*******************************************************************************
  38 * Types and macros
  39 *******************************************************************************/
  40
  41 /* Ratio of num_entries:num_buckets that will cause a resize */
  42 #define RESIZE_NUMER 7
  43 #define RESIZE_DENOM 10
  44 #define RESIZE_THRESHOLD(x) (((x)*RESIZE_NUMER) / RESIZE_DENOM)
  45 #define MIN_BUCKETS(x) (((x)*RESIZE_DENOM) / RESIZE_NUMER)
  46
  47 /* Selected for good scaling qualities when resizing dictionary
  48  * ... see: http://www.concentric.net/~ttwang/tech/hashsize.htm
  49  */
  50 #define DEFAULT_DICT_SIZE 89
  51
  52 typedef struct dict_entry DictEntry;
  53
  54 typedef enum {
  55         EMPTY = 0,
  56         USED = 1,
  57         DELETED = 2
  58 } DictEntryState;
  59
  60 struct dict_entry {
  61         const void *key;
  62         void *value;
  63         DictEntryState state;
  64 };
  65
  66 /*******************************************************************************
  67 * Function prototypes
  68 *******************************************************************************/
  69
  70 static kern_return_t get_locate_index(const KXLDDict *dict, const void *key,
  71     u_int *idx);
  72 static kern_return_t get_insert_index(const KXLDDict *dict, const void *key,
  73     u_int *idx);
  74 static kern_return_t resize_dict(KXLDDict *dict);
  75
  76 /*******************************************************************************
  77 *******************************************************************************/
  78 kern_return_t
  79 kxld_dict_init(KXLDDict * dict, kxld_dict_hash hash, kxld_dict_cmp cmp,
  80     u_int num_entries)
  81 {
  82         kern_return_t rval = KERN_FAILURE;
  83         u_int min_buckets = MIN_BUCKETS(num_entries);
  84         u_int num_buckets = DEFAULT_DICT_SIZE;
  85
  86         check(dict);
  87         check(hash);
  88         check(cmp);
  89
  90         /* We want the number of allocated buckets to be at least twice that of the
  91          * number to be inserted.
  92          */
  93         while (min_buckets > num_buckets) {
  94                 num_buckets *= 2;
  95                 num_buckets++;
  96         }
  97
  98         /* Allocate enough buckets for the anticipated number of entries */
  99         rval = kxld_array_init(&dict->buckets, sizeof(DictEntry), num_buckets);
 100         require_noerr(rval, finish);
 101
 102         /* Initialize */
 103         dict->hash = hash;
 104         dict->cmp = cmp;
 105         dict->num_entries = 0;
 106         dict->resize_threshold = RESIZE_THRESHOLD(num_buckets);
 107
 108         rval = KERN_SUCCESS;
 109
 110 finish:
 111         return rval;
 112 }
 113
 114 /*******************************************************************************
 115 *******************************************************************************/
 116 void
 117 kxld_dict_clear(KXLDDict *dict)
 118 {
 119         check(dict);
 120
 121         dict->hash = NULL;
 122         dict->cmp = NULL;
 123         dict->num_entries = 0;
 124         dict->resize_threshold = 0;
 125         kxld_array_clear(&dict->buckets);
 126         kxld_array_clear(&dict->resize_buckets);
 127 }
 128
 129 /*******************************************************************************
 130 *******************************************************************************/
 131 void
 132 kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict)
 133 {
 134         check(iter);
 135         check(dict);
 136
 137         iter->idx = 0;
 138         iter->dict = dict;
 139 }
 140
 141 /*******************************************************************************
 142 *******************************************************************************/
 143 void
 144 kxld_dict_deinit(KXLDDict *dict)
 145 {
 146         check(dict);
 147
 148         kxld_array_deinit(&dict->buckets);
 149         kxld_array_deinit(&dict->resize_buckets);
 150 }
 151
 152 /*******************************************************************************
 153 *******************************************************************************/
 154 u_int
 155 kxld_dict_get_num_entries(const KXLDDict *dict)
 156 {
 157         check(dict);
 158
 159         return dict->num_entries;
 160 }
 161
 162 /*******************************************************************************
 163 *******************************************************************************/
 164 void *
 165 kxld_dict_find(const KXLDDict *dict, const void *key)
 166 {
 167         kern_return_t rval = KERN_FAILURE;
 168         DictEntry *entry = NULL;
 169         u_int idx = 0;
 170
 171         check(dict);
 172         check(key);
 173
 174         rval = get_locate_index(dict, key, &idx);
 175         if (rval) {
 176                 return NULL;
 177         }
 178
 179         entry = kxld_array_get_item(&dict->buckets, idx);
 180
 181         return entry->value;
 182 }
 183
 184 /*******************************************************************************
 185  * This dictionary uses linear probing, which means that when there is a
 186  * collision, we just walk along the buckets until a free bucket shows up.
 187  * A consequence of this is that when looking up an item, items that lie between
 188  * its hash value and its actual bucket may have been deleted since it was
 189  * inserted.  Thus, we should only stop a lookup when we've wrapped around the
 190  * dictionary or encountered an EMPTY bucket.
 191  ********************************************************************************/
 192 static kern_return_t
 193 get_locate_index(const KXLDDict *dict, const void *key, u_int *_idx)
 194 {
 195         kern_return_t rval = KERN_FAILURE;
 196         DictEntry *entry = NULL;
 197         u_int base, idx;
 198
 199         base = idx = dict->hash(dict, key);
 200
 201         /* Iterate until we match the key, wrap, or hit an empty bucket */
 202         entry = kxld_array_get_item(&dict->buckets, idx);
 203         while (!dict->cmp(entry->key, key)) {
 204                 if (entry->state == EMPTY) {
 205                         goto finish;
 206                 }
 207
 208                 idx = (idx + 1) % dict->buckets.nitems;
 209                 if (idx == base) {
 210                         goto finish;
 211                 }
 212
 213                 entry = kxld_array_get_item(&dict->buckets, idx);
 214         }
 215
 216         check(idx < dict->buckets.nitems);
 217
 218         *_idx = idx;
 219         rval = KERN_SUCCESS;
 220
 221 finish:
 222         return rval;
 223 }
 224
 225 /*******************************************************************************
 226 *******************************************************************************/
 227 kern_return_t
 228 kxld_dict_insert(KXLDDict *dict, const void *key, void *value)
 229 {
 230         kern_return_t rval = KERN_FAILURE;
 231         DictEntry *entry = NULL;
 232         u_int idx = 0;
 233
 234         check(dict);
 235         check(key);
 236         check(value);
 237
 238         /* Resize if we are greater than the capacity threshold.
 239          * Note: this is expensive, but the dictionary can be sized correctly at
 240          * construction to avoid ever having to do this.
 241          */
 242         while (dict->num_entries > dict->resize_threshold) {
 243                 rval = resize_dict(dict);
 244                 require_noerr(rval, finish);
 245         }
 246
 247         /* If this function returns FULL after we've already resized appropriately
 248          * something is very wrong and we should return an error.
 249          */
 250         rval = get_insert_index(dict, key, &idx);
 251         require_noerr(rval, finish);
 252
 253         /* Insert the new key-value pair into the bucket, but only count it as a
 254          * new entry if we are not overwriting an existing entry.
 255          */
 256         entry = kxld_array_get_item(&dict->buckets, idx);
 257         if (entry->state != USED) {
 258                 dict->num_entries++;
 259                 entry->key = key;
 260                 entry->state = USED;
 261         }
 262         entry->value = value;
 263
 264         rval = KERN_SUCCESS;
 265
 266 finish:
 267         return rval;
 268 }
 269
 270 /*******************************************************************************
 271 * Increases the hash table's capacity by 2N+1.  Uses dictionary API.  Not
 272 * fast; just correct.
 273 *******************************************************************************/
 274 static kern_return_t
 275 resize_dict(KXLDDict *dict)
 276 {
 277         kern_return_t rval = KERN_FAILURE;
 278         KXLDArray tmparray;
 279         DictEntry *entry = NULL;
 280         u_int nbuckets = (dict->buckets.nitems * 2 + 1);
 281         u_int i = 0;
 282
 283         check(dict);
 284
 285         /* Initialize a new set of buckets to hold more entries */
 286         rval = kxld_array_init(&dict->resize_buckets, sizeof(DictEntry), nbuckets);
 287         require_noerr(rval, finish);
 288
 289         /* Swap the new buckets with the old buckets */
 290         tmparray = dict->buckets;
 291         dict->buckets = dict->resize_buckets;
 292         dict->resize_buckets = tmparray;
 293
 294         /* Reset dictionary parameters */
 295         dict->num_entries = 0;
 296         dict->resize_threshold = RESIZE_THRESHOLD(dict->buckets.nitems);
 297
 298         /* Rehash all of the entries */
 299         for (i = 0; i < dict->resize_buckets.nitems; ++i) {
 300                 entry = kxld_array_get_item(&dict->resize_buckets, i);
 301                 if (entry->state == USED) {
 302                         rval = kxld_dict_insert(dict, entry->key, entry->value);
 303                         require_noerr(rval, finish);
 304                 }
 305         }
 306
 307         /* Clear the old buckets */
 308         kxld_array_clear(&dict->resize_buckets);
 309
 310         rval = KERN_SUCCESS;
 311
 312 finish:
 313         return rval;
 314 }
 315
 316 /*******************************************************************************
 317 * Simple function to find the first empty cell
 318 *******************************************************************************/
 319 static kern_return_t
 320 get_insert_index(const KXLDDict *dict, const void *key, u_int *r_index)
 321 {
 322         kern_return_t rval = KERN_FAILURE;
 323         DictEntry *entry = NULL;
 324         u_int base, idx;
 325
 326         base = idx = dict->hash(dict, key);
 327
 328         /* Iterate through the buckets until we find an EMPTY bucket, a DELETED
 329          * bucket, or a key match.
 330          */
 331         entry = kxld_array_get_item(&dict->buckets, idx);
 332         while (entry->state == USED && !dict->cmp(entry->key, key)) {
 333                 idx = (idx + 1) % dict->buckets.nitems;
 334                 require_action(base != idx, finish, rval = KERN_FAILURE);
 335                 entry = kxld_array_get_item(&dict->buckets, idx);
 336         }
 337
 338         *r_index = idx;
 339         rval = KERN_SUCCESS;
 340
 341 finish:
 342         return rval;
 343 }
 344
 345 /*******************************************************************************
 346 *******************************************************************************/
 347 void
 348 kxld_dict_remove(KXLDDict *dict, const void *key, void **value)
 349 {
 350         kern_return_t rval = KERN_FAILURE;
 351         DictEntry *entry = NULL;
 352         u_int idx = 0;
 353
 354         check(dict);
 355         check(key);
 356
 357         /* Find the item */
 358         rval = get_locate_index(dict, key, &idx);
 359         if (rval) {
 360                 if (value) {
 361                         *value = NULL;
 362                 }
 363                 return;
 364         }
 365
 366         entry = kxld_array_get_item(&dict->buckets, idx);
 367
 368         /* Save the value if requested */
 369         if (value) {
 370                 *value = entry->value;
 371         }
 372
 373         /* Delete the item from the dictionary */
 374         entry->key = NULL;
 375         entry->value = NULL;
 376         entry->state = DELETED;
 377         dict->num_entries--;
 378 }
 379
 380 /*******************************************************************************
 381 *******************************************************************************/
 382 void
 383 kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key,
 384     void **value)
 385 {
 386         DictEntry *entry = NULL;
 387
 388         check(iter);
 389         check(key);
 390         check(value);
 391
 392         *key = NULL;
 393         *value = NULL;
 394
 395         /* Walk over the dictionary looking for USED buckets */
 396         for (; iter->idx < iter->dict->buckets.nitems; ++(iter->idx)) {
 397                 entry = kxld_array_get_item(&iter->dict->buckets, iter->idx);
 398                 if (entry->state == USED) {
 399                         *key = entry->key;
 400                         *value = entry->value;
 401                         ++(iter->idx);
 402                         break;
 403                 }
 404         }
 405 }
 406
 407 /*******************************************************************************
 408 *******************************************************************************/
 409 void
 410 kxld_dict_iterator_reset(KXLDDictIterator *iter)
 411 {
 412         iter->idx = 0;
 413 }
 414
 415 /*******************************************************************************
 416 * This is Daniel Bernstein's hash algorithm from comp.lang.c
 417 * It's fast and distributes well.  Returns an idx into the symbol hash table.
 418 * NOTE: Will not check for a valid pointer - performance
 419 *******************************************************************************/
 420 u_int
 421 kxld_dict_string_hash(const KXLDDict *dict, const void *_key)
 422 {
 423         const char *key = _key;
 424         u_int c = 0;
 425         u_int hash_val = 5381;
 426
 427         check(dict);
 428         check(_key);
 429
 430         while ((c = *key++)) {
 431                 /* hash(i) = hash(i-1) *33 ^ name[i] */
 432                 hash_val = ((hash_val << 5) + hash_val) ^ c;
 433         }
 434
 435         return hash_val % dict->buckets.nitems;
 436 }
 437
 438 u_int
 439 kxld_dict_uint32_hash(const KXLDDict *dict, const void *_key)
 440 {
 441         uint32_t key = *(const uint32_t *) _key;
 442
 443         check(_key);
 444
 445         return (u_int) (key % dict->buckets.nitems);
 446 }
 447
 448 u_int
 449 kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *_key)
 450 {
 451         kxld_addr_t key = *(const kxld_addr_t *) _key;
 452
 453         check(_key);
 454
 455         return (u_int) (key % dict->buckets.nitems);
 456 }
 457
 458 u_int
 459 kxld_dict_string_cmp(const void *key1, const void *key2)
 460 {
 461         return streq(key1, key2);
 462 }
 463
 464 u_int
 465 kxld_dict_uint32_cmp(const void *key1, const void *key2)
 466 {
 467         const uint32_t *a = key1;
 468         const uint32_t *b = key2;
 469
 470         return a && b && (*a == *b);
 471 }
 472
 473 u_int
 474 kxld_dict_kxldaddr_cmp(const void *key1, const void *key2)
 475 {
 476         const kxld_addr_t *a = key1;
 477         const kxld_addr_t *b = key2;
 478
 479         return a && b && (*a == *b);
 480 }