include/objc-shared-cache.h

   1 /*
   2  * Copyright (c) 2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25 Portions derived from:
  26
  27 --------------------------------------------------------------------
  28 lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
  29 hash(), hash2(), hash3, and mix() are externally useful functions.
  30 Routines to test the hash are included if SELF_TEST is defined.
  31 You can use this free for any purpose.  It has no warranty.
  32 --------------------------------------------------------------------
  33
  34 ------------------------------------------------------------------------------
  35 perfect.c: code to generate code for a hash for perfect hashing.
  36 (c) Bob Jenkins, September 1996, December 1999
  37 You may use this code in any way you wish, and it is free.  No warranty.
  38 I hereby place this in the public domain.
  39 Source is http://burtleburtle.net/bob/c/perfect.c
  40 ------------------------------------------------------------------------------
  41 */
  42
  43 /*
  44  * objc-selopt.h
  45  * Interface between libobjc and dyld
  46  * for selector uniquing in the dyld shared cache.
  47  *
  48  * When building the shared cache, dyld locates all selectors and selector
  49  * references in the cached images. It builds a perfect hash table out of
  50  * them and writes the table into the shared cache copy of libobjc.
  51  * libobjc then uses that table as the builtin selector list.
  52  *
  53  * Versioning
  54  * The table has a version number. dyld and objc can both ignore the table
  55  * if the other used the wrong version number.
  56  *
  57  * Completeness
  58  * Not all libraries are in the shared cache. Libraries that are in the
  59  * shared cache and were optimized are specially marked. Libraries on
  60  * disk never include those marks.
  61  *
  62  * Coherency
  63  * Libraries optimized in the shared cache can be replaced by unoptimized
  64  * copies from disk when loaded. The copy from disk is not marked and will
  65  * be fixed up by libobjc. The shared cache copy is still mapped into the
  66  * process, so the table can point to cstring data in that library's part
  67  * of the shared cache without trouble.
  68  *
  69  * Atomicity
  70  * dyld writes the table itself last. If dyld marks some metadata as
  71  * updated but then fails to write a table for some reason, libobjc
  72  * fixes up all metadata as if it were not marked.
  73  */
  74
  75 #ifndef _OBJC_SELOPT_H
  76 #define _OBJC_SELOPT_H
  77
  78 /*
  79   DO NOT INCLUDE ANY objc HEADERS HERE
  80   dyld USES THIS FILE AND CANNOT SEE THEM
  81 */
  82 #include <stdint.h>
  83 #include <stdlib.h>
  84 #ifdef SELOPT_WRITE
  85 #include <ext/hash_map>
  86 #endif
  87 /*
  88   DO NOT INCLUDE ANY objc HEADERS HERE
  89   dyld USES THIS FILE AND CANNOT SEE THEM
  90 */
  91
  92 #ifndef STATIC_ASSERT
  93 #   define STATIC_ASSERT(x) _STATIC_ASSERT2(x, __LINE__)
  94 #   define _STATIC_ASSERT2(x, line) _STATIC_ASSERT3(x, line)
  95 #   define _STATIC_ASSERT3(x, line)                                     \
  96         typedef struct {                                                \
  97             int _static_assert[(x) ? 0 : -1];                           \
  98         } _static_assert_ ## line __attribute__((unavailable))
  99 #endif
 100
 101 #define SELOPT_DEBUG 0
 102
 103 #define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x)
 104 #define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x)
 105
 106 namespace objc_opt {
 107
 108 typedef int32_t objc_stringhash_offset_t;
 109 typedef uint8_t objc_stringhash_check_t;
 110
 111 #ifdef SELOPT_WRITE
 112
 113 // Perfect hash code is at the end of this file.
 114
 115 struct perfect_hash {
 116     uint32_t capacity;
 117     uint32_t occupied;
 118     uint32_t shift;
 119     uint32_t mask;
 120     uint64_t salt;
 121
 122     uint32_t scramble[256];
 123     uint8_t *tab;  // count == mask+1; free with delete[]
 124
 125     perfect_hash() : tab(0) { }
 126
 127     ~perfect_hash() { if (tab) delete[] tab; }
 128 };
 129
 130 struct eqstr {
 131     bool operator()(const char* s1, const char* s2) const {
 132         return strcmp(s1, s2) == 0;
 133     }
 134 };
 135
 136 // cstring => cstring's vmaddress
 137 // (used for selector names and class names)
 138 typedef __gnu_cxx::hash_map<const char *, uint64_t, __gnu_cxx::hash<const char *>, eqstr> string_map;
 139
 140 // class name => (class vmaddress, header_info vmaddress)
 141 typedef __gnu_cxx::hash_multimap<const char *, std::pair<uint64_t, uint64_t>, __gnu_cxx::hash<const char *>, eqstr> class_map;
 142
 143 static perfect_hash make_perfect(const string_map& strings);
 144
 145 #endif
 146
 147 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level);
 148
 149 // Precomputed perfect hash table of strings.
 150 // Base class for precomputed selector table and class table.
 151 // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure.
 152 struct objc_stringhash_t {
 153     uint32_t capacity;
 154     uint32_t occupied;
 155     uint32_t shift;
 156     uint32_t mask;
 157     uint32_t zero;
 158     uint32_t unused; // alignment pad
 159     uint64_t salt;
 160
 161     uint32_t scramble[256];
 162     uint8_t tab[0];                   /* tab[mask+1] (always power-of-2) */
 163     // uint8_t checkbytes[capacity];  /* check byte for each string */
 164     // int32_t offsets[capacity];     /* offsets from &capacity to cstrings */
 165
 166     objc_stringhash_check_t *checkbytes() { return (objc_stringhash_check_t *)&tab[mask+1]; }
 167     const objc_stringhash_check_t *checkbytes() const { return (const objc_stringhash_check_t *)&tab[mask+1]; }
 168
 169     objc_stringhash_offset_t *offsets() { return (objc_stringhash_offset_t *)&checkbytes()[capacity]; }
 170     const objc_stringhash_offset_t *offsets() const { return (const objc_stringhash_offset_t *)&checkbytes()[capacity]; }
 171
 172     uint32_t hash(const char *key) const
 173     {
 174         uint64_t val = lookup8((uint8_t*)key, strlen(key), salt);
 175         uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]];
 176         return index;
 177     }
 178
 179     // The check bytes areused to reject strings that aren't in the table
 180     // without paging in the table's cstring data. This checkbyte calculation
 181     // catches 4785/4815 rejects when launching Safari; a perfect checkbyte
 182     // would catch 4796/4815.
 183     objc_stringhash_check_t checkbyte(const char *key) const
 184     {
 185         return
 186             ((key[0] & 0x7) << 5)
 187             |
 188             (strlen(key) & 0x1f);
 189     }
 190
 191 #define INDEX_NOT_FOUND (~(uint32_t)0)
 192
 193     uint32_t getIndex(const char *key) const
 194     {
 195         uint32_t h = hash(key);
 196
 197         // Use check byte to reject without paging in the table's cstrings
 198         objc_stringhash_check_t h_check = checkbytes()[h];
 199         objc_stringhash_check_t key_check = checkbyte(key);
 200         bool check_fail = (h_check != key_check);
 201 #if ! SELOPT_DEBUG
 202         if (check_fail) return INDEX_NOT_FOUND;
 203 #endif
 204
 205         const char *result = (const char *)this + offsets()[h];
 206         if (0 != strcmp(key, result)) return INDEX_NOT_FOUND;
 207
 208 #if SELOPT_DEBUG
 209         if (check_fail) abort();
 210 #endif
 211
 212         return h;
 213     }
 214
 215 #ifdef SELOPT_WRITE
 216
 217     size_t size()
 218     {
 219         return sizeof(objc_stringhash_t)
 220             + mask+1
 221             + capacity * sizeof(objc_stringhash_check_t)
 222             + capacity * sizeof(objc_stringhash_offset_t);
 223     }
 224
 225     void byteswap(bool little_endian)
 226     {
 227         // tab and checkbytes are arrays of bytes, no swap needed
 228         for (uint32_t i = 0; i < 256; i++) {
 229             S32(scramble[i]);
 230         }
 231         objc_stringhash_offset_t *o = offsets();
 232         for (uint32_t i = 0; i < capacity; i++) {
 233             S32(o[i]);
 234         }
 235
 236         S32(capacity);
 237         S32(occupied);
 238         S32(shift);
 239         S32(mask);
 240         S32(zero);
 241         S64(salt);
 242     }
 243
 244     const char *write(uint64_t base, size_t remaining, string_map& strings)
 245     {
 246         if (sizeof(objc_stringhash_t) > remaining) {
 247             return "selector section too small (metadata not optimized)";
 248         }
 249
 250         if (strings.size() == 0) {
 251             bzero(this, sizeof(objc_stringhash_t));
 252             return NULL;
 253         }
 254
 255         perfect_hash phash = make_perfect(strings);
 256         if (phash.capacity == 0) {
 257             return "perfect hash failed (metadata not optimized)";
 258         }
 259
 260         // Set header
 261         capacity = phash.capacity;
 262         occupied = phash.occupied;
 263         shift = phash.shift;
 264         mask = phash.mask;
 265         zero = 0;
 266         unused = 0;
 267         salt = phash.salt;
 268
 269         if (size() > remaining) {
 270             return "selector section too small (metadata not optimized)";
 271         }
 272
 273         // Set hash data
 274         for (uint32_t i = 0; i < 256; i++) {
 275             scramble[i] = phash.scramble[i];
 276         }
 277         for (uint32_t i = 0; i < phash.mask+1; i++) {
 278             tab[i] = phash.tab[i];
 279         }
 280
 281         // Set offsets to ""
 282         for (uint32_t i = 0; i < phash.capacity; i++) {
 283             offsets()[i] =
 284                 (objc_stringhash_offset_t)offsetof(objc_stringhash_t, zero);
 285         }
 286         // Set checkbytes to 0
 287         for (uint32_t i = 0; i < phash.capacity; i++) {
 288             checkbytes()[i] = 0;
 289         }
 290
 291         // Set real string offsets and checkbytes
 292 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 293         string_map::const_iterator s;
 294         for (s = strings.begin(); s != strings.end(); ++s) {
 295             int64_t offset = s->second - base;
 296             if ((offset<<SHIFT)>>SHIFT != offset) {
 297                 return "selector offset too big (metadata not optimized)";
 298             }
 299
 300             uint32_t h = hash(s->first);
 301             offsets()[h] = (objc_stringhash_offset_t)offset;
 302             checkbytes()[h] = checkbyte(s->first);
 303         }
 304 #       undef SHIFT
 305
 306         return NULL;
 307     }
 308
 309 // SELOPT_WRITE
 310 #endif
 311 };
 312
 313
 314 // Precomputed selector table.
 315 // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure.
 316 struct objc_selopt_t : objc_stringhash_t {
 317     const char *get(const char *key) const
 318     {
 319         uint32_t h = getIndex(key);
 320         if (h == INDEX_NOT_FOUND) return NULL;
 321
 322         return (const char *)this + offsets()[h];
 323     }
 324 };
 325
 326 // Precomputed class list.
 327 // Edit objc-sel-table.s and OPT_INITIALIZER if you change these structures.
 328
 329 struct objc_classheader_t {
 330     objc_stringhash_offset_t clsOffset;
 331     objc_stringhash_offset_t hiOffset;
 332
 333     // For duplicate class names:
 334     // clsOffset = count<<1 | 1
 335     // duplicated classes are duplicateOffsets[hiOffset..hiOffset+count-1]
 336     bool isDuplicate() const { return clsOffset & 1; }
 337     uint32_t duplicateCount() const { return clsOffset >> 1; }
 338     uint32_t duplicateIndex() const { return hiOffset; }
 339 };
 340
 341
 342 struct objc_clsopt_t : objc_stringhash_t {
 343     // ...objc_stringhash_t fields...
 344     // objc_classheader_t classOffsets[capacity]; /* offsets from &capacity to class_t and header_info */
 345     // uint32_t duplicateCount;
 346     // objc_classheader_t duplicateOffsets[duplicatedClasses];
 347
 348     objc_classheader_t *classOffsets() { return (objc_classheader_t *)&offsets()[capacity]; }
 349     const objc_classheader_t *classOffsets() const { return (const objc_classheader_t *)&offsets()[capacity]; }
 350
 351     uint32_t& duplicateCount() { return *(uint32_t *)&classOffsets()[capacity]; }
 352     const uint32_t& duplicateCount() const { return *(const uint32_t *)&classOffsets()[capacity]; }
 353
 354     objc_classheader_t *duplicateOffsets() { return (objc_classheader_t *)(&duplicateCount()+1); }
 355     const objc_classheader_t *duplicateOffsets() const { return (const objc_classheader_t *)(&duplicateCount()+1); }
 356
 357     // 0/NULL/NULL: not found
 358     // 1/ptr/ptr: found exactly one
 359     // n/NULL/NULL:  found N - use getClassesAndHeaders() instead
 360     uint32_t getClassAndHeader(const char *key, void*& cls, void*& hi) const
 361     {
 362         uint32_t h = getIndex(key);
 363         if (h == INDEX_NOT_FOUND) {
 364             cls = NULL;
 365             hi = NULL;
 366             return 0;
 367         }
 368
 369         const objc_classheader_t& clshi = classOffsets()[h];
 370         if (! clshi.isDuplicate()) {
 371             // class appears in exactly one header
 372             cls = (void *)((const char *)this + clshi.clsOffset);
 373             hi  = (void *)((const char *)this + clshi.hiOffset);
 374             return 1;
 375         }
 376         else {
 377             // class appears in more than one header - use getClassesAndHeaders
 378             cls = NULL;
 379             hi = NULL;
 380             return clshi.duplicateCount();
 381         }
 382     }
 383
 384     void getClassesAndHeaders(const char *key, void **cls, void **hi) const
 385     {
 386         uint32_t h = getIndex(key);
 387         if (h == INDEX_NOT_FOUND) return;
 388
 389         const objc_classheader_t& clshi = classOffsets()[h];
 390         if (! clshi.isDuplicate()) {
 391             // class appears in exactly one header
 392             cls[0] = (void *)((const char *)this + clshi.clsOffset);
 393             hi[0]  = (void *)((const char *)this + clshi.hiOffset);
 394         }
 395         else {
 396             // class appears in more than one header
 397             uint32_t count = clshi.duplicateCount();
 398             const objc_classheader_t *list =
 399                 &duplicateOffsets()[clshi.duplicateIndex()];
 400             for (uint32_t i = 0; i < count; i++) {
 401                 cls[i] = (void *)((const char *)this + list[i].clsOffset);
 402                 hi[i]  = (void *)((const char *)this + list[i].hiOffset);
 403             }
 404         }
 405     }
 406
 407 #ifdef SELOPT_WRITE
 408
 409     size_t size()
 410     {
 411         return
 412             objc_stringhash_t::size()
 413             + capacity * sizeof(objc_classheader_t)
 414             + sizeof(duplicateCount())
 415             + duplicateCount() * sizeof(objc_classheader_t);
 416     }
 417
 418     void byteswap(bool little_endian)
 419     {
 420         objc_classheader_t *o;
 421
 422         o = classOffsets();
 423         for (uint32_t i = 0; i < capacity; i++) {
 424             S32(o[i].clsOffset);
 425             S32(o[i].hiOffset);
 426         }
 427
 428         o = duplicateOffsets();
 429         for (uint32_t i = 0; i < duplicateCount(); i++) {
 430             S32(o[i].clsOffset);
 431             S32(o[i].hiOffset);
 432         }
 433
 434         S32(duplicateCount());
 435
 436         objc_stringhash_t::byteswap(little_endian);
 437     }
 438
 439     const char *write(uint64_t base, size_t remaining,
 440                       string_map& strings, class_map& classes, bool verbose)
 441     {
 442         const char *err;
 443         err = objc_stringhash_t::write(base, remaining, strings);
 444         if (err) return err;
 445
 446         if (size() > remaining) {
 447             return "selector section too small (metadata not optimized)";
 448         }
 449
 450         // Set class offsets to &zero
 451         objc_stringhash_offset_t zeroOffset =
 452             (objc_stringhash_offset_t)offsetof(objc_stringhash_t, zero);
 453         for (uint32_t i = 0; i < capacity; i++) {
 454             classOffsets()[i].clsOffset = zeroOffset;
 455             classOffsets()[i].hiOffset = zeroOffset;
 456         }
 457
 458         // Set real class offsets
 459 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 460         class_map::const_iterator c;
 461         for (c = classes.begin(); c != classes.end(); ++c) {
 462             uint32_t h = getIndex(c->first);
 463             if (h == INDEX_NOT_FOUND) {
 464                 return "class list busted (metadata not optimized)";
 465             }
 466
 467             if (classOffsets()[h].clsOffset != zeroOffset) {
 468                 // already did this class
 469                 continue;
 470             }
 471
 472             uint32_t count = classes.count(c->first);
 473             if (count == 1) {
 474                 // only one class with this name
 475
 476                 int64_t coff = c->second.first - base;
 477                 int64_t hoff = c->second.second - base;
 478                 if ((coff<<SHIFT)>>SHIFT != coff) {
 479                     return "class offset too big (metadata not optimized)";
 480                 }
 481                 if ((hoff<<SHIFT)>>SHIFT != hoff) {
 482                     return "header offset too big (metadata not optimized)";
 483                 }
 484
 485                 classOffsets()[h].clsOffset = (objc_stringhash_offset_t)coff;
 486                 classOffsets()[h].hiOffset  = (objc_stringhash_offset_t)hoff;
 487             }
 488             else {
 489                 // class name has duplicates - write them all now
 490                 if (verbose) {
 491                     fprintf(stderr, "update_dyld_shared_cache: %u duplicates of Objective-C class %s\n", count, c->first);
 492                 }
 493
 494                 uint32_t dest = duplicateCount();
 495                 duplicateCount() += count;
 496                 if (size() > remaining) {
 497                     return "selector section too small (metadata not optimized)";
 498                 }
 499
 500                 // classOffsets() instead contains count and array index
 501                 classOffsets()[h].clsOffset = count*2 + 1;
 502                 classOffsets()[h].hiOffset = dest;
 503
 504                 std::pair<class_map::const_iterator, class_map::const_iterator>
 505                     duplicates = classes.equal_range(c->first);
 506                 class_map::const_iterator dup;
 507                 for (dup = duplicates.first; dup != duplicates.second; ++dup) {
 508                     int64_t coff = dup->second.first - base;
 509                     int64_t hoff = dup->second.second - base;
 510                     if ((coff<<SHIFT)>>SHIFT != coff) {
 511                         return "class offset too big (metadata not optimized)";
 512                     }
 513                     if ((hoff<<SHIFT)>>SHIFT != hoff) {
 514                         return "header offset too big (metadata not optimized)";
 515                     }
 516
 517                     duplicateOffsets()[dest].clsOffset = (objc_stringhash_offset_t)coff;
 518                     duplicateOffsets()[dest].hiOffset  = (objc_stringhash_offset_t)hoff;
 519                     dest++;
 520                 }
 521             }
 522         }
 523 #       undef SHIFT
 524
 525         return NULL;
 526     }
 527
 528 // SELOPT_WRITE
 529 #endif
 530 };
 531
 532 // Precomputed image list.
 533 struct objc_headeropt_t;
 534
 535 // Precomputed class list.
 536 struct objc_clsopt_t;
 537
 538 // Edit objc-sel-table.s if you change this value.
 539 enum { VERSION = 12 };
 540
 541 // Top-level optimization structure.
 542 // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure.
 543 struct objc_opt_t {
 544     uint32_t version;
 545     int32_t selopt_offset;
 546     int32_t headeropt_offset;
 547     int32_t clsopt_offset;
 548
 549     const objc_selopt_t* selopt() const {
 550         if (selopt_offset == 0) return NULL;
 551         return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
 552     }
 553     objc_selopt_t* selopt() {
 554         if (selopt_offset == 0) return NULL;
 555         return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
 556     }
 557
 558     struct objc_headeropt_t* headeropt() const {
 559         if (headeropt_offset == 0) return NULL;
 560         return (struct objc_headeropt_t *)((uint8_t *)this + headeropt_offset);
 561     }
 562
 563     struct objc_clsopt_t* clsopt() const {
 564         if (clsopt_offset == 0) return NULL;
 565         return (objc_clsopt_t *)((uint8_t *)this + clsopt_offset);
 566     }
 567 };
 568
 569 // sizeof(objc_opt_t) must be pointer-aligned
 570 STATIC_ASSERT(sizeof(objc_opt_t) % sizeof(void*) == 0);
 571
 572 // Initializer for empty opt of type uint32_t[].
 573 #define X8(x) x, x, x, x, x, x, x, x
 574 #define X64(x) X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x)
 575 #define X256(x) X64(x), X64(x), X64(x), X64(x)
 576 #define OPT_INITIALIZER {                                           \
 577         /* objc_opt_t */                                            \
 578         objc_opt::VERSION, 16, 0, 0,                                \
 579         /* objc_selopt_t */                                         \
 580         4, 4, 63, 3, 0, 0, 0,0, X256(0), 0, 0, 16, 16, 16, 16       \
 581         /* no objc_headeropt_t */                                   \
 582         /* no objc_clsopt_t */                                      \
 583 }
 584
 585
 586 /*
 587 --------------------------------------------------------------------
 588 mix -- mix 3 64-bit values reversibly.
 589 mix() takes 48 machine instructions, but only 24 cycles on a superscalar
 590   machine (like Intel's new MMX architecture).  It requires 4 64-bit
 591   registers for 4::2 parallelism.
 592 All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
 593   (a,b,c), and all deltas of bottom bits were tested.  All deltas were
 594   tested both on random keys and on keys that were nearly all zero.
 595   These deltas all cause every bit of c to change between 1/3 and 2/3
 596   of the time (well, only 113/400 to 287/400 of the time for some
 597   2-bit delta).  These deltas all cause at least 80 bits to change
 598   among (a,b,c) when the mix is run either forward or backward (yes it
 599   is reversible).
 600 This implies that a hash using mix64 has no funnels.  There may be
 601   characteristics with 3-bit deltas or bigger, I didn't test for
 602   those.
 603 --------------------------------------------------------------------
 604 */
 605 #define mix64(a,b,c) \
 606 { \
 607   a -= b; a -= c; a ^= (c>>43); \
 608   b -= c; b -= a; b ^= (a<<9); \
 609   c -= a; c -= b; c ^= (b>>8); \
 610   a -= b; a -= c; a ^= (c>>38); \
 611   b -= c; b -= a; b ^= (a<<23); \
 612   c -= a; c -= b; c ^= (b>>5); \
 613   a -= b; a -= c; a ^= (c>>35); \
 614   b -= c; b -= a; b ^= (a<<49); \
 615   c -= a; c -= b; c ^= (b>>11); \
 616   a -= b; a -= c; a ^= (c>>12); \
 617   b -= c; b -= a; b ^= (a<<18); \
 618   c -= a; c -= b; c ^= (b>>22); \
 619 }
 620
 621 /*
 622 --------------------------------------------------------------------
 623 hash() -- hash a variable-length key into a 64-bit value
 624   k     : the key (the unaligned variable-length array of bytes)
 625   len   : the length of the key, counting by bytes
 626   level : can be any 8-byte value
 627 Returns a 64-bit value.  Every bit of the key affects every bit of
 628 the return value.  No funnels.  Every 1-bit and 2-bit delta achieves
 629 avalanche.  About 41+5len instructions.
 630
 631 The best hash table sizes are powers of 2.  There is no need to do
 632 mod a prime (mod is sooo slow!).  If you need less than 64 bits,
 633 use a bitmask.  For example, if you need only 10 bits, do
 634   h = (h & hashmask(10));
 635 In which case, the hash table should have hashsize(10) elements.
 636
 637 If you are hashing n strings (uint8_t **)k, do it like this:
 638   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
 639
 640 By Bob Jenkins, Jan 4 1997.  bob_jenkins@burtleburtle.net.  You may
 641 use this code any way you wish, private, educational, or commercial,
 642 but I would appreciate if you give me credit.
 643
 644 See http://burtleburtle.net/bob/hash/evahash.html
 645 Use for hash table lookup, or anything where one collision in 2^^64
 646 is acceptable.  Do NOT use for cryptographic purposes.
 647 --------------------------------------------------------------------
 648 */
 649
 650 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level)
 651 // uint8_t *k;        /* the key */
 652 // uint64_t  length;   /* the length of the key */
 653 // uint64_t  level;    /* the previous hash, or an arbitrary value */
 654 {
 655   uint64_t a,b,c;
 656   size_t len;
 657
 658   /* Set up the internal state */
 659   len = length;
 660   a = b = level;                         /* the previous hash value */
 661   c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
 662
 663   /*---------------------------------------- handle most of the key */
 664   while (len >= 24)
 665   {
 666     a += (k[0]        +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24)
 667      +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56));
 668     b += (k[8]        +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24)
 669      +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56));
 670     c += (k[16]       +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24)
 671      +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56));
 672     mix64(a,b,c);
 673     k += 24; len -= 24;
 674   }
 675
 676   /*------------------------------------- handle the last 23 bytes */
 677   c += length;
 678   switch(len)              /* all the case statements fall through */
 679   {
 680   case 23: c+=((uint64_t)k[22]<<56);
 681   case 22: c+=((uint64_t)k[21]<<48);
 682   case 21: c+=((uint64_t)k[20]<<40);
 683   case 20: c+=((uint64_t)k[19]<<32);
 684   case 19: c+=((uint64_t)k[18]<<24);
 685   case 18: c+=((uint64_t)k[17]<<16);
 686   case 17: c+=((uint64_t)k[16]<<8);
 687     /* the first byte of c is reserved for the length */
 688   case 16: b+=((uint64_t)k[15]<<56);
 689   case 15: b+=((uint64_t)k[14]<<48);
 690   case 14: b+=((uint64_t)k[13]<<40);
 691   case 13: b+=((uint64_t)k[12]<<32);
 692   case 12: b+=((uint64_t)k[11]<<24);
 693   case 11: b+=((uint64_t)k[10]<<16);
 694   case 10: b+=((uint64_t)k[ 9]<<8);
 695   case  9: b+=((uint64_t)k[ 8]);
 696   case  8: a+=((uint64_t)k[ 7]<<56);
 697   case  7: a+=((uint64_t)k[ 6]<<48);
 698   case  6: a+=((uint64_t)k[ 5]<<40);
 699   case  5: a+=((uint64_t)k[ 4]<<32);
 700   case  4: a+=((uint64_t)k[ 3]<<24);
 701   case  3: a+=((uint64_t)k[ 2]<<16);
 702   case  2: a+=((uint64_t)k[ 1]<<8);
 703   case  1: a+=((uint64_t)k[ 0]);
 704     /* case 0: nothing left to add */
 705   }
 706   mix64(a,b,c);
 707   /*-------------------------------------------- report the result */
 708   return c;
 709 }
 710
 711
 712 #ifdef SELOPT_WRITE
 713
 714 /*
 715 ------------------------------------------------------------------------------
 716 This generates a minimal perfect hash function.  That means, given a
 717 set of n keys, this determines a hash function that maps each of
 718 those keys into a value in 0..n-1 with no collisions.
 719
 720 The perfect hash function first uses a normal hash function on the key
 721 to determine (a,b) such that the pair (a,b) is distinct for all
 722 keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
 723 tab[] is an array of 1-byte values and scramble[] is a 256-term array of
 724 2-byte or 4-byte values.  If there are n keys, the length of tab[] is a
 725 power of two between n/3 and n.
 726
 727 I found the idea of computing distinct (a,b) values in "Practical minimal
 728 perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
 729 Communications of the ACM, January 1992.  They found the idea in Chichelli
 730 (CACM Jan 1980).  Beyond that, our methods differ.
 731
 732 The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
 733 0..*blen*-1.  A fast hash function determines both a and b
 734 simultaneously.  Any decent hash function is likely to produce
 735 hashes so that (a,b) is distinct for all pairs.  I try the hash
 736 using different values of *salt* until all pairs are distinct.
 737
 738 The final hash is (a XOR scramble[tab[b]]).  *scramble* is a
 739 predetermined mapping of 0..255 into 0..smax-1.  *tab* is an
 740 array that we fill in in such a way as to make the hash perfect.
 741
 742 First we fill in all values of *tab* that are used by more than one
 743 key.  We try all possible values for each position until one works.
 744
 745 This leaves m unmapped keys and m values that something could hash to.
 746 If you treat unmapped keys as lefthand nodes and unused hash values
 747 as righthand nodes, and draw a line connecting each key to each hash
 748 value it could map to, you get a bipartite graph.  We attempt to
 749 find a perfect matching in this graph.  If we succeed, we have
 750 determined a perfect hash for the whole set of keys.
 751
 752 *scramble* is used because (a^tab[i]) clusters keys around *a*.
 753 ------------------------------------------------------------------------------
 754 */
 755
 756 typedef uint64_t  ub8;
 757 #define UB8MAXVAL 0xffffffffffffffffLL
 758 #define UB8BITS 64
 759 typedef uint32_t  ub4;
 760 #define UB4MAXVAL 0xffffffff
 761 #define UB4BITS 32
 762 typedef uint16_t  ub2;
 763 #define UB2MAXVAL 0xffff
 764 #define UB2BITS 16
 765 typedef uint8_t ub1;
 766 #define UB1MAXVAL 0xff
 767 #define UB1BITS 8
 768
 769 #define TRUE  1
 770 #define FALSE 0
 771
 772 #define SCRAMBLE_LEN 256 // ((ub4)1<<16)                    /* length of *scramble* */
 773 #define RETRY_INITKEY 2048  /* number of times to try to find distinct (a,b) */
 774 #define RETRY_PERFECT 4     /* number of times to try to make a perfect hash */
 775
 776
 777 /* representation of a key */
 778 struct key
 779 {
 780   ub1        *name_k;                                      /* the actual key */
 781   ub4         len_k;                         /* the length of the actual key */
 782   ub4         hash_k;                 /* the initial hash value for this key */
 783 /* beyond this point is mapping-dependent */
 784   ub4         a_k;                            /* a, of the key maps to (a,b) */
 785   ub4         b_k;                            /* b, of the key maps to (a,b) */
 786   struct key *nextb_k;                               /* next key with this b */
 787 };
 788 typedef  struct key  key;
 789
 790 /* things indexed by b of original (a,b) pair */
 791 struct bstuff
 792 {
 793   ub2  val_b;                                        /* hash=a^tabb[b].val_b */
 794   key *list_b;                   /* tabb[i].list_b is list of keys with b==i */
 795   ub4  listlen_b;                                        /* length of list_b */
 796   ub4  water_b;           /* high watermark of who has visited this map node */
 797 };
 798 typedef  struct bstuff  bstuff;
 799
 800 /* things indexed by final hash value */
 801 struct hstuff
 802 {
 803   key *key_h;                   /* tabh[i].key_h is the key with a hash of i */
 804 };
 805 typedef  struct hstuff hstuff;
 806
 807 /* things indexed by queue position */
 808 struct qstuff
 809 {
 810   bstuff *b_q;                        /* b that currently occupies this hash */
 811   ub4     parent_q;     /* queue position of parent that could use this hash */
 812   ub2     newval_q;      /* what to change parent tab[b] to to use this hash */
 813   ub2     oldval_q;                              /* original value of tab[b] */
 814 };
 815 typedef  struct qstuff  qstuff;
 816
 817
 818 /*
 819 ------------------------------------------------------------------------------
 820 Find the mapping that will produce a perfect hash
 821 ------------------------------------------------------------------------------
 822 */
 823
 824 /* return the ceiling of the log (base 2) of val */
 825 static ub4  log2u(ub4 val)
 826 {
 827   ub4 i;
 828   for (i=0; ((ub4)1<<i) < val; ++i)
 829     ;
 830   return i;
 831 }
 832
 833 /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
 834 /* permute(0)=0.  This is intended and useful. */
 835 static ub4  permute(ub4 x, ub4 nbits)
 836 // ub4 x;                                       /* input, a value in some range */
 837 // ub4 nbits;                                 /* input, number of bits in range */
 838 {
 839   int i;
 840   int mask   = ((ub4)1<<nbits)-1;                                /* all ones */
 841   int const2 = 1+nbits/2;
 842   int const3 = 1+nbits/3;
 843   int const4 = 1+nbits/4;
 844   int const5 = 1+nbits/5;
 845   for (i=0; i<20; ++i)
 846   {
 847     x = (x+(x<<const2)) & mask;
 848     x = (x^(x>>const3));
 849     x = (x+(x<<const4)) & mask;
 850     x = (x^(x>>const5));
 851   }
 852   return x;
 853 }
 854
 855 /* initialize scramble[] with distinct random values in 0..smax-1 */
 856 static void scrambleinit(ub4 *scramble, ub4 smax)
 857 // ub4      *scramble;                            /* hash is a^scramble[tab[b]] */
 858 // ub4       smax;                    /* scramble values should be in 0..smax-1 */
 859 {
 860   ub4 i;
 861
 862   /* fill scramble[] with distinct random integers in 0..smax-1 */
 863   for (i=0; i<SCRAMBLE_LEN; ++i)
 864   {
 865     scramble[i] = permute(i, log2u(smax));
 866   }
 867 }
 868
 869
 870 /*
 871  * put keys in tabb according to key->b_k
 872  * check if the initial hash might work
 873  */
 874 static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete)
 875 // bstuff   *tabb;                     /* output, list of keys with b for (a,b) */
 876 // ub4       blen;                                            /* length of tabb */
 877 // key      *keys;                               /* list of keys already hashed */
 878 // int       complete;        /* TRUE means to complete init despite collisions */
 879 {
 880   int  nocollision = TRUE;
 881   ub4 i;
 882
 883   memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
 884
 885   /* Two keys with the same (a,b) guarantees a collision */
 886   for (i = 0; i < nkeys; i++) {
 887     key *mykey = keys+i;
 888     key *otherkey;
 889
 890     for (otherkey=tabb[mykey->b_k].list_b;
 891          otherkey;
 892          otherkey=otherkey->nextb_k)
 893     {
 894       if (mykey->a_k == otherkey->a_k)
 895       {
 896         nocollision = FALSE;
 897         if (!complete)
 898           return FALSE;
 899       }
 900     }
 901     ++tabb[mykey->b_k].listlen_b;
 902     mykey->nextb_k = tabb[mykey->b_k].list_b;
 903     tabb[mykey->b_k].list_b = mykey;
 904   }
 905
 906   /* no two keys have the same (a,b) pair */
 907   return nocollision;
 908 }
 909
 910
 911 /* Do the initial hash for normal mode (use lookup and checksum) */
 912 static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt)
 913 // key      *keys;                                          /* list of all keys */
 914 // ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
 915 // ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
 916 // ub4       smax;                   /* maximum range of computable hash values */
 917 // ub4       salt;                     /* used to initialize the hash function */
 918 // gencode  *final;                          /* output, code for the final hash */
 919 {
 920   ub4 loga = log2u(alen);                            /* log based 2 of blen */
 921   ub4 i;
 922   for (i = 0; i < nkeys; i++) {
 923     key *mykey = keys+i;
 924     ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt);
 925     mykey->a_k = (loga > 0) ? hash>>(UB8BITS-loga) : 0;
 926     mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
 927   }
 928 }
 929
 930
 931 /* Try to apply an augmenting list */
 932 static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback)
 933 // bstuff *tabb;
 934 // hstuff *tabh;
 935 // qstuff *tabq;
 936 // ub4     blen;
 937 // ub4    *scramble;
 938 // ub4     tail;
 939 // int     rollback;          /* FALSE applies augmenting path, TRUE rolls back */
 940 {
 941   ub4     hash;
 942   key    *mykey;
 943   bstuff *pb;
 944   ub4     child;
 945   ub4     parent;
 946   ub4     stabb;                                         /* scramble[tab[b]] */
 947
 948   /* walk from child to parent */
 949   for (child=tail-1; child; child=parent)
 950   {
 951     parent = tabq[child].parent_q;                    /* find child's parent */
 952     pb     = tabq[parent].b_q;             /* find parent's list of siblings */
 953
 954     /* erase old hash values */
 955     stabb = scramble[pb->val_b];
 956     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
 957     {
 958       hash = mykey->a_k^stabb;
 959       if (mykey == tabh[hash].key_h)
 960       {                            /* erase hash for all of child's siblings */
 961         tabh[hash].key_h = (key *)0;
 962       }
 963     }
 964
 965     /* change pb->val_b, which will change the hashes of all parent siblings */
 966     pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
 967
 968     /* set new hash values */
 969     stabb = scramble[pb->val_b];
 970     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
 971     {
 972       hash = mykey->a_k^stabb;
 973       if (rollback)
 974       {
 975         if (parent == 0) continue;                  /* root never had a hash */
 976       }
 977       else if (tabh[hash].key_h)
 978       {
 979         /* very rare: roll back any changes */
 980         apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
 981         return FALSE;                                  /* failure, collision */
 982       }
 983       tabh[hash].key_h = mykey;
 984     }
 985   }
 986   return TRUE;
 987 }
 988
 989
 990 /*
 991 -------------------------------------------------------------------------------
 992 augment(): Add item to the mapping.
 993
 994 Construct a spanning tree of *b*s with *item* as root, where each
 995 parent can have all its hashes changed (by some new val_b) with
 996 at most one collision, and each child is the b of that collision.
 997
 998 I got this from Tarjan's "Data Structures and Network Algorithms".  The
 999 path from *item* to a *b* that can be remapped with no collision is
1000 an "augmenting path".  Change values of tab[b] along the path so that
1001 the unmapped key gets mapped and the unused hash value gets used.
1002
1003 Assuming 1 key per b, if m out of n hash values are still unused,
1004 you should expect the transitive closure to cover n/m nodes before
1005 an unused node is found.  Sum(i=1..n)(n/i) is about nlogn, so expect
1006 this approach to take about nlogn time to map all single-key b's.
1007 -------------------------------------------------------------------------------
1008 */
1009 static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys,
1010                    ub4 highwater)
1011 // bstuff   *tabb;                                        /* stuff indexed by b */
1012 // hstuff   *tabh;  /* which key is associated with which hash, indexed by hash */
1013 // qstuff   *tabq;            /* queue of *b* values, this is the spanning tree */
1014 // ub4       blen;                                            /* length of tabb */
1015 // ub4      *scramble;                      /* final hash is a^scramble[tab[b]] */
1016 // ub4       smax;                                 /* highest value in scramble */
1017 // bstuff   *item;                           /* &tabb[b] for the b to be mapped */
1018 // ub4       nkeys;                         /* final hash must be in 0..nkeys-1 */
1019 // ub4       highwater;        /* a value higher than any now in tabb[].water_b */
1020 {
1021   ub4  q;                      /* current position walking through the queue */
1022   ub4  tail;              /* tail of the queue.  0 is the head of the queue. */
1023   ub4  limit=UB1MAXVAL+1;
1024   ub4  highhash = smax;
1025
1026   /* initialize the root of the spanning tree */
1027   tabq[0].b_q = item;
1028   tail = 1;
1029
1030   /* construct the spanning tree by walking the queue, add children to tail */
1031   for (q=0; q<tail; ++q)
1032   {
1033     bstuff *myb = tabq[q].b_q;                        /* the b for this node */
1034     ub4     i;                              /* possible value for myb->val_b */
1035
1036     if (q == 1)
1037       break;                                  /* don't do transitive closure */
1038
1039     for (i=0; i<limit; ++i)
1040     {
1041       bstuff *childb = (bstuff *)0;             /* the b that this i maps to */
1042       key    *mykey;                       /* for walking through myb's keys */
1043
1044       for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
1045       {
1046         key    *childkey;
1047         ub4 hash = mykey->a_k^scramble[i];
1048
1049         if (hash >= highhash) break;                        /* out of bounds */
1050         childkey = tabh[hash].key_h;
1051
1052         if (childkey)
1053         {
1054           bstuff *hitb = &tabb[childkey->b_k];
1055
1056           if (childb)
1057           {
1058             if (childb != hitb) break;            /* hit at most one child b */
1059           }
1060           else
1061           {
1062             childb = hitb;                        /* remember this as childb */
1063             if (childb->water_b == highwater) break;     /* already explored */
1064           }
1065         }
1066       }
1067       if (mykey) continue;             /* myb with i has multiple collisions */
1068
1069       /* add childb to the queue of reachable things */
1070       if (childb) childb->water_b = highwater;
1071       tabq[tail].b_q      = childb;
1072       tabq[tail].newval_q = i;     /* how to make parent (myb) use this hash */
1073       tabq[tail].oldval_q = myb->val_b;            /* need this for rollback */
1074       tabq[tail].parent_q = q;
1075       ++tail;
1076
1077       if (!childb)
1078       {                                  /* found an *i* with no collisions? */
1079         /* try to apply the augmenting path */
1080         if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
1081           return TRUE;        /* success, item was added to the perfect hash */
1082
1083         --tail;                    /* don't know how to handle such a child! */
1084       }
1085     }
1086   }
1087   return FALSE;
1088 }
1089
1090
1091 /* find a mapping that makes this a perfect hash */
1092 static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys)
1093 {
1094   ub4 maxkeys;                           /* maximum number of keys for any b */
1095   ub4 i, j;
1096
1097 #if SELOPT_DEBUG
1098   fprintf(stderr, "           blen %d smax %d nkeys %d\n", blen, smax, nkeys);
1099 #endif
1100
1101   /* clear any state from previous attempts */
1102   memset((void *)tabh, 0, sizeof(hstuff)*smax);
1103   memset((void *)tabq, 0, sizeof(qstuff)*(blen+1));
1104
1105   for (maxkeys=0,i=0; i<blen; ++i)
1106     if (tabb[i].listlen_b > maxkeys)
1107       maxkeys = tabb[i].listlen_b;
1108
1109   /* In descending order by number of keys, map all *b*s */
1110   for (j=maxkeys; j>0; --j)
1111     for (i=0; i<blen; ++i)
1112       if (tabb[i].listlen_b == j)
1113         if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys,
1114                      i+1))
1115         {
1116           return FALSE;
1117         }
1118
1119   /* Success!  We found a perfect hash of all keys into 0..nkeys-1. */
1120   return TRUE;
1121 }
1122
1123
1124 /* guess initial values for alen and blen */
1125 static void initalen(ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys)
1126 // ub4      *alen;                                      /* output, initial alen */
1127 // ub4      *blen;                                      /* output, initial blen */
1128 // ub4      smax;    /* input, power of two greater or equal to max hash value */
1129 // ub4       nkeys;                              /* number of keys being hashed */
1130 {
1131   /*
1132    * Find initial *alen, *blen
1133    * Initial alen and blen values were found empirically.  Some factors:
1134    *
1135    * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
1136    *
1137    * alen and blen must be powers of 2 because the values in 0..alen-1 and
1138    * 0..blen-1 are produced by applying a bitmask to the initial hash function.
1139    *
1140    * alen must be less than smax, in fact less than nkeys, because otherwise
1141    * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
1142    * all the *a*s associated with a given *b*, so there would be no legal
1143    * value to assign to tab[b].  This only matters when we're doing a minimal
1144    * perfect hash.
1145    *
1146    * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
1147    * and alen*blen = smax*smax/32.
1148    *
1149    * Values of blen less than smax/4 never work, and smax/2 always works.
1150    *
1151    * We want blen as small as possible because it is the number of bytes in
1152    * the huge array we must create for the perfect hash.
1153    *
1154    * When nkey <= smax*(5/8), blen=smax/4 works much more often with
1155    * alen=smax/8 than with alen=smax/4.  Above smax*(5/8), blen=smax/4
1156    * doesn't seem to care whether alen=smax/8 or alen=smax/4.  I think it
1157    * has something to do with 5/8 = 1/8 * 5.  For example examine 80000,
1158    * 85000, and 90000 keys with different values of alen.  This only matters
1159    * if we're doing a minimal perfect hash.
1160    *
1161    * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
1162    * Bigger than that it must produce two integers, which increases the
1163    * cost of the hash per character hashed.
1164    */
1165   *alen = smax;                     /* no reason to restrict alen to smax/2 */
1166   *blen = ((nkeys <= smax*0.6) ? smax/16 :
1167            (nkeys <= smax*0.8) ? smax/8 : smax/4);
1168
1169   if (*alen < 1) *alen = 1;
1170   if (*blen < 1) *blen = 1;
1171
1172 #if SELOPT_DEBUG
1173   fprintf(stderr, "alen %d blen %d smax %d nkeys %d\n", *alen, *blen, smax, nkeys);
1174 #endif
1175 }
1176
1177 /*
1178 ** Try to find a perfect hash function.
1179 ** Return the successful initializer for the initial hash.
1180 ** Return 0 if no perfect hash could be found.
1181 */
1182 static int findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub8 *salt,
1183                     ub4 *scramble, ub4 smax, key *keys, ub4 nkeys)
1184 // bstuff  **tabb;           /* output, tab[] of the perfect hash, length *blen */
1185 // ub4      *alen;                 /* output, 0..alen-1 is range for a of (a,b) */
1186 // ub4      *blen;                 /* output, 0..blen-1 is range for b of (a,b) */
1187 // ub4      *salt;                         /* output, initializes initial hash */
1188 // ub4      *scramble;                      /* input, hash = a^scramble[tab[b]] */
1189 // ub4      smax;                           /* input, scramble[i] in 0..smax-1 */
1190 // key      *keys;                                       /* input, keys to hash */
1191 // ub4       nkeys;                       /* input, number of keys being hashed */
1192 {
1193   ub4 bad_initkey;                       /* how many times did initkey fail? */
1194   ub4 bad_perfect;                       /* how many times did perfect fail? */
1195   ub4 si;                        /* trial initializer for initial hash */
1196   ub4 maxalen;
1197   hstuff *tabh;                       /* table of keys indexed by hash value */
1198   qstuff *tabq;    /* table of stuff indexed by queue value, used by augment */
1199
1200   /* guess initial values for alen and blen */
1201   initalen(alen, blen, smax, nkeys);
1202
1203   scrambleinit(scramble, smax);
1204
1205   maxalen = smax;
1206
1207   /* allocate working memory */
1208   *tabb = new bstuff[*blen];
1209   tabq  = new qstuff[*blen+1];
1210   tabh  = new hstuff[smax];
1211
1212   /* Actually find the perfect hash */
1213   *salt = 0;
1214   bad_initkey = 0;
1215   bad_perfect = 0;
1216   for (si=1; ; ++si)
1217   {
1218     ub4 rslinit;
1219     /* Try to find distinct (A,B) for all keys */
1220     *salt = si * 0x9e3779b97f4a7c13LL; /* golden ratio (arbitrary value) */
1221     initnorm(keys, nkeys, *alen, *blen, smax, *salt);
1222     rslinit = inittab(*tabb, *blen, keys, nkeys, FALSE);
1223     if (rslinit == 0)
1224     {
1225       /* didn't find distinct (a,b) */
1226       if (++bad_initkey >= RETRY_INITKEY)
1227       {
1228         /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
1229         if (*alen < maxalen)
1230         {
1231           *alen *= 2;
1232         }
1233         else if (*blen < smax)
1234         {
1235           *blen *= 2;
1236           delete[] tabq;
1237           delete[] *tabb;
1238           *tabb  = new bstuff[*blen];
1239           tabq  = new qstuff[*blen+1];
1240         }
1241         bad_initkey = 0;
1242         bad_perfect = 0;
1243       }
1244       continue;                             /* two keys have same (a,b) pair */
1245     }
1246
1247     /* Given distinct (A,B) for all keys, build a perfect hash */
1248     if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys))
1249     {
1250       if (++bad_perfect >= RETRY_PERFECT)
1251       {
1252         if (*blen < smax)
1253         {
1254           *blen *= 2;
1255           delete[] *tabb;
1256           delete[] tabq;
1257           *tabb  = new bstuff[*blen];
1258           tabq  = new qstuff[*blen+1];
1259           --si;               /* we know this salt got distinct (A,B) */
1260         }
1261         else
1262         {
1263           return 0;
1264         }
1265         bad_perfect = 0;
1266       }
1267       continue;
1268     }
1269
1270     break;
1271   }
1272
1273   /* free working memory */
1274   delete[] tabh;
1275   delete[] tabq;
1276
1277   return 1;
1278 }
1279
1280 /*
1281 ------------------------------------------------------------------------------
1282 Input/output type routines
1283 ------------------------------------------------------------------------------
1284 */
1285
1286 /* get the list of keys */
1287 static void getkeys(key **keys, ub4 *nkeys, const string_map& strings)
1288 {
1289   key *buf = new key[strings.size()];
1290   size_t i;
1291   string_map::const_iterator s;
1292   for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) {
1293     key *mykey = buf+i;
1294     mykey->name_k = (ub1 *)s->first;
1295     mykey->len_k  = (ub4)strlen(s->first);
1296   }
1297   *keys = buf;
1298   *nkeys = strings.size();
1299 }
1300
1301
1302 static perfect_hash
1303 make_perfect(const string_map& strings)
1304 {
1305   ub4       nkeys;                                         /* number of keys */
1306   key      *keys;                                    /* head of list of keys */
1307   bstuff   *tab;                                       /* table indexed by b */
1308   ub4       smax;            /* scramble[] values in 0..smax-1, a power of 2 */
1309   ub4       alen;                            /* a in 0..alen-1, a power of 2 */
1310   ub4       blen;                            /* b in 0..blen-1, a power of 2 */
1311   ub8       salt;                       /* a parameter to the hash function */
1312   ub4       scramble[SCRAMBLE_LEN];           /* used in final hash function */
1313   int ok;
1314   int i;
1315   perfect_hash result;
1316
1317   /* read in the list of keywords */
1318   getkeys(&keys, &nkeys, strings);
1319
1320   /* find the hash */
1321   smax = ((ub4)1<<log2u(nkeys));
1322   ok = findhash(&tab, &alen, &blen, &salt,
1323                 scramble, smax, keys, nkeys);
1324   if (!ok) {
1325       smax = 2 * ((ub4)1<<log2u(nkeys));
1326       ok = findhash(&tab, &alen, &blen, &salt,
1327                     scramble, smax, keys, nkeys);
1328   }
1329   if (!ok) {
1330       bzero(&result, sizeof(result));
1331   } else {
1332       /* build the tables */
1333       result.capacity = smax;
1334       result.occupied = nkeys;
1335       result.shift = UB8BITS - log2u(alen);
1336       result.mask = blen - 1;
1337       result.salt = salt;
1338
1339       result.tab = new uint8_t[blen];
1340       for (i = 0; i < blen; i++) {
1341           result.tab[i] = tab[i].val_b;
1342       }
1343       for (i = 0; i < 256; i++) {
1344           result.scramble[i] = scramble[i];
1345       }
1346   }
1347
1348   delete[] keys;
1349   delete[] tab;
1350
1351   return result;
1352 }
1353
1354 // SELOPT_WRITE
1355 #endif
1356
1357 // namespace objc_selopt
1358 };
1359
1360 #undef S32
1361 #undef S64
1362
1363 #endif