include/objc-shared-cache.h

   1 /*
   2  * Copyright (c) 2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25 Portions derived from:
  26
  27 --------------------------------------------------------------------
  28 lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
  29 hash(), hash2(), hash3, and mix() are externally useful functions.
  30 Routines to test the hash are included if SELF_TEST is defined.
  31 You can use this free for any purpose.  It has no warranty.
  32 --------------------------------------------------------------------
  33
  34 ------------------------------------------------------------------------------
  35 perfect.c: code to generate code for a hash for perfect hashing.
  36 (c) Bob Jenkins, September 1996, December 1999
  37 You may use this code in any way you wish, and it is free.  No warranty.
  38 I hereby place this in the public domain.
  39 Source is http://burtleburtle.net/bob/c/perfect.c
  40 ------------------------------------------------------------------------------
  41 */
  42
  43 /*
  44  * objc-selopt.h
  45  * Interface between libobjc and dyld
  46  * for selector uniquing in the dyld shared cache.
  47  *
  48  * When building the shared cache, dyld locates all selectors and selector
  49  * references in the cached images. It builds a perfect hash table out of
  50  * them and writes the table into the shared cache copy of libobjc.
  51  * libobjc then uses that table as the builtin selector list.
  52  *
  53  * Versioning
  54  * The table has a version number. dyld and objc can both ignore the table
  55  * if the other used the wrong version number.
  56  *
  57  * Completeness
  58  * Not all libraries are in the shared cache. Libraries that are in the
  59  * shared cache and were optimized are specially marked. Libraries on
  60  * disk never include those marks.
  61  *
  62  * Coherency
  63  * Libraries optimized in the shared cache can be replaced by unoptimized
  64  * copies from disk when loaded. The copy from disk is not marked and will
  65  * be fixed up by libobjc. The shared cache copy is still mapped into the
  66  * process, so the table can point to cstring data in that library's part
  67  * of the shared cache without trouble.
  68  *
  69  * Atomicity
  70  * dyld writes the table itself last. If dyld marks some metadata as
  71  * updated but then fails to write a table for some reason, libobjc
  72  * fixes up all metadata as if it were not marked.
  73  */
  74
  75 #ifndef _OBJC_SELOPT_H
  76 #define _OBJC_SELOPT_H
  77
  78 /*
  79   DO NOT INCLUDE ANY objc HEADERS HERE
  80   dyld USES THIS FILE AND CANNOT SEE THEM
  81 */
  82 #include <stdint.h>
  83 #include <stdlib.h>
  84 #ifdef SELOPT_WRITE
  85 #include <unordered_map>
  86 #endif
  87 /*
  88   DO NOT INCLUDE ANY objc HEADERS HERE
  89   dyld USES THIS FILE AND CANNOT SEE THEM
  90 */
  91
  92 #ifndef STATIC_ASSERT
  93 #   define STATIC_ASSERT(x) _STATIC_ASSERT2(x, __LINE__)
  94 #   define _STATIC_ASSERT2(x, line) _STATIC_ASSERT3(x, line)
  95 #   define _STATIC_ASSERT3(x, line)                                     \
  96         typedef struct {                                                \
  97             int _static_assert[(x) ? 0 : -1];                           \
  98         } _static_assert_ ## line __attribute__((unavailable))
  99 #endif
 100
 101 #define SELOPT_DEBUG 0
 102
 103 #define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x)
 104 #define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x)
 105
 106 namespace objc_opt {
 107
 108 typedef int32_t objc_stringhash_offset_t;
 109 typedef uint8_t objc_stringhash_check_t;
 110
 111 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level);
 112
 113 #ifdef SELOPT_WRITE
 114
 115 // Perfect hash code is at the end of this file.
 116
 117 struct perfect_hash {
 118     uint32_t capacity;
 119     uint32_t occupied;
 120     uint32_t shift;
 121     uint32_t mask;
 122     uint64_t salt;
 123
 124     uint32_t scramble[256];
 125     uint8_t *tab;  // count == mask+1; free with delete[]
 126
 127     perfect_hash() : tab(0) { }
 128
 129     ~perfect_hash() { if (tab) delete[] tab; }
 130 };
 131
 132 struct eqstr {
 133     bool operator()(const char* s1, const char* s2) const {
 134         return strcmp(s1, s2) == 0;
 135     }
 136 };
 137
 138 struct hashstr {
 139     size_t operator()(const char *s) const {
 140         return (size_t)lookup8((uint8_t *)s, strlen(s), 0);
 141     }
 142 };
 143
 144 // cstring => cstring's vmaddress
 145 // (used for selector names and class names)
 146 typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> string_map;
 147
 148 // protocol name => protocol vmaddress
 149 typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> protocol_map;
 150
 151 // class name => (class vmaddress, header_info vmaddress)
 152 typedef std::unordered_multimap<const char *, std::pair<uint64_t, uint64_t>, hashstr, eqstr> class_map;
 153
 154 static perfect_hash make_perfect(const string_map& strings);
 155
 156 #endif
 157
 158
 159 // Precomputed perfect hash table of strings.
 160 // Base class for precomputed selector table and class table.
 161 // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure.
 162 struct objc_stringhash_t {
 163     uint32_t capacity;
 164     uint32_t occupied;
 165     uint32_t shift;
 166     uint32_t mask;
 167     uint32_t unused1;  // was zero
 168     uint32_t unused2;  // alignment pad
 169     uint64_t salt;
 170
 171     uint32_t scramble[256];
 172     uint8_t tab[0];                   /* tab[mask+1] (always power-of-2) */
 173     // uint8_t checkbytes[capacity];  /* check byte for each string */
 174     // int32_t offsets[capacity];     /* offsets from &capacity to cstrings */
 175
 176     objc_stringhash_check_t *checkbytes() { return (objc_stringhash_check_t *)&tab[mask+1]; }
 177     const objc_stringhash_check_t *checkbytes() const { return (const objc_stringhash_check_t *)&tab[mask+1]; }
 178
 179     objc_stringhash_offset_t *offsets() { return (objc_stringhash_offset_t *)&checkbytes()[capacity]; }
 180     const objc_stringhash_offset_t *offsets() const { return (const objc_stringhash_offset_t *)&checkbytes()[capacity]; }
 181
 182     uint32_t hash(const char *key, size_t keylen) const
 183     {
 184         uint64_t val = lookup8((uint8_t*)key, keylen, salt);
 185         uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]];
 186         return index;
 187     }
 188
 189     uint32_t hash(const char *key) const
 190     {
 191         return hash(key, strlen(key));
 192     }
 193
 194     // The check bytes areused to reject strings that aren't in the table
 195     // without paging in the table's cstring data. This checkbyte calculation
 196     // catches 4785/4815 rejects when launching Safari; a perfect checkbyte
 197     // would catch 4796/4815.
 198     objc_stringhash_check_t checkbyte(const char *key, size_t keylen) const
 199     {
 200         return
 201             ((key[0] & 0x7) << 5)
 202             |
 203             ((uint8_t)keylen & 0x1f);
 204     }
 205
 206     objc_stringhash_check_t checkbyte(const char *key) const
 207     {
 208         return checkbyte(key, strlen(key));
 209     }
 210
 211
 212 #define INDEX_NOT_FOUND (~(uint32_t)0)
 213
 214     uint32_t getIndex(const char *key) const
 215     {
 216         size_t keylen = strlen(key);
 217         uint32_t h = hash(key, keylen);
 218
 219         // Use check byte to reject without paging in the table's cstrings
 220         objc_stringhash_check_t h_check = checkbytes()[h];
 221         objc_stringhash_check_t key_check = checkbyte(key, keylen);
 222         bool check_fail = (h_check != key_check);
 223 #if ! SELOPT_DEBUG
 224         if (check_fail) return INDEX_NOT_FOUND;
 225 #endif
 226
 227         objc_stringhash_offset_t offset = offsets()[h];
 228         if (offset == 0) return INDEX_NOT_FOUND;
 229         const char *result = (const char *)this + offset;
 230         if (0 != strcmp(key, result)) return INDEX_NOT_FOUND;
 231
 232 #if SELOPT_DEBUG
 233         if (check_fail) abort();
 234 #endif
 235
 236         return h;
 237     }
 238
 239 #ifdef SELOPT_WRITE
 240
 241     size_t size()
 242     {
 243         return sizeof(objc_stringhash_t)
 244             + mask+1
 245             + capacity * sizeof(objc_stringhash_check_t)
 246             + capacity * sizeof(objc_stringhash_offset_t);
 247     }
 248
 249     void byteswap(bool little_endian)
 250     {
 251         // tab and checkbytes are arrays of bytes, no swap needed
 252         for (uint32_t i = 0; i < 256; i++) {
 253             S32(scramble[i]);
 254         }
 255         objc_stringhash_offset_t *o = offsets();
 256         for (uint32_t i = 0; i < capacity; i++) {
 257             S32(o[i]);
 258         }
 259
 260         S32(capacity);
 261         S32(occupied);
 262         S32(shift);
 263         S32(mask);
 264         S64(salt);
 265     }
 266
 267     const char *write(uint64_t base, size_t remaining, string_map& strings)
 268     {
 269         if (sizeof(objc_stringhash_t) > remaining) {
 270             return "selector section too small (metadata not optimized)";
 271         }
 272
 273         if (strings.size() == 0) {
 274             bzero(this, sizeof(objc_stringhash_t));
 275             return NULL;
 276         }
 277
 278         perfect_hash phash = make_perfect(strings);
 279         if (phash.capacity == 0) {
 280             return "perfect hash failed (metadata not optimized)";
 281         }
 282
 283         // Set header
 284         capacity = phash.capacity;
 285         occupied = phash.occupied;
 286         shift = phash.shift;
 287         mask = phash.mask;
 288         unused1 = 0;
 289         unused2 = 0;
 290         salt = phash.salt;
 291
 292         if (size() > remaining) {
 293             return "selector section too small (metadata not optimized)";
 294         }
 295
 296         // Set hash data
 297         for (uint32_t i = 0; i < 256; i++) {
 298             scramble[i] = phash.scramble[i];
 299         }
 300         for (uint32_t i = 0; i < phash.mask+1; i++) {
 301             tab[i] = phash.tab[i];
 302         }
 303
 304         // Set offsets to 0
 305         for (uint32_t i = 0; i < phash.capacity; i++) {
 306             offsets()[i] = 0;
 307         }
 308         // Set checkbytes to 0
 309         for (uint32_t i = 0; i < phash.capacity; i++) {
 310             checkbytes()[i] = 0;
 311         }
 312
 313         // Set real string offsets and checkbytes
 314 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 315         string_map::const_iterator s;
 316         for (s = strings.begin(); s != strings.end(); ++s) {
 317             int64_t offset = s->second - base;
 318             if ((offset<<SHIFT)>>SHIFT != offset) {
 319                 return "selector offset too big (metadata not optimized)";
 320             }
 321
 322             uint32_t h = hash(s->first);
 323             offsets()[h] = (objc_stringhash_offset_t)offset;
 324             checkbytes()[h] = checkbyte(s->first);
 325         }
 326 #       undef SHIFT
 327
 328         return NULL;
 329     }
 330
 331 // SELOPT_WRITE
 332 #endif
 333 };
 334
 335
 336 // Precomputed selector table.
 337 // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure.
 338 struct objc_selopt_t : objc_stringhash_t {
 339     const char *get(const char *key) const
 340     {
 341         uint32_t h = getIndex(key);
 342         if (h == INDEX_NOT_FOUND) return NULL;
 343
 344         return (const char *)this + offsets()[h];
 345     }
 346 };
 347
 348 // Precomputed class list.
 349 // Edit objc-sel-table.s and OPT_INITIALIZER if you change these structures.
 350
 351 struct objc_classheader_t {
 352     objc_stringhash_offset_t clsOffset;
 353     objc_stringhash_offset_t hiOffset;
 354
 355     // For duplicate class names:
 356     // clsOffset = count<<1 | 1
 357     // duplicated classes are duplicateOffsets[hiOffset..hiOffset+count-1]
 358     bool isDuplicate() const { return clsOffset & 1; }
 359     uint32_t duplicateCount() const { return clsOffset >> 1; }
 360     uint32_t duplicateIndex() const { return hiOffset; }
 361 };
 362
 363
 364 struct objc_clsopt_t : objc_stringhash_t {
 365     // ...objc_stringhash_t fields...
 366     // objc_classheader_t classOffsets[capacity]; /* offsets from &capacity to class_t and header_info */
 367     // uint32_t duplicateCount;
 368     // objc_classheader_t duplicateOffsets[duplicatedClasses];
 369
 370     objc_classheader_t *classOffsets() { return (objc_classheader_t *)&offsets()[capacity]; }
 371     const objc_classheader_t *classOffsets() const { return (const objc_classheader_t *)&offsets()[capacity]; }
 372
 373     uint32_t& duplicateCount() { return *(uint32_t *)&classOffsets()[capacity]; }
 374     const uint32_t& duplicateCount() const { return *(const uint32_t *)&classOffsets()[capacity]; }
 375
 376     objc_classheader_t *duplicateOffsets() { return (objc_classheader_t *)(&duplicateCount()+1); }
 377     const objc_classheader_t *duplicateOffsets() const { return (const objc_classheader_t *)(&duplicateCount()+1); }
 378
 379     // 0/NULL/NULL: not found
 380     // 1/ptr/ptr: found exactly one
 381     // n/NULL/NULL:  found N - use getClassesAndHeaders() instead
 382     uint32_t getClassAndHeader(const char *key, void*& cls, void*& hi) const
 383     {
 384         uint32_t h = getIndex(key);
 385         if (h == INDEX_NOT_FOUND) {
 386             cls = NULL;
 387             hi = NULL;
 388             return 0;
 389         }
 390
 391         const objc_classheader_t& clshi = classOffsets()[h];
 392         if (! clshi.isDuplicate()) {
 393             // class appears in exactly one header
 394             cls = (void *)((const char *)this + clshi.clsOffset);
 395             hi  = (void *)((const char *)this + clshi.hiOffset);
 396             return 1;
 397         }
 398         else {
 399             // class appears in more than one header - use getClassesAndHeaders
 400             cls = NULL;
 401             hi = NULL;
 402             return clshi.duplicateCount();
 403         }
 404     }
 405
 406     void getClassesAndHeaders(const char *key, void **cls, void **hi) const
 407     {
 408         uint32_t h = getIndex(key);
 409         if (h == INDEX_NOT_FOUND) return;
 410
 411         const objc_classheader_t& clshi = classOffsets()[h];
 412         if (! clshi.isDuplicate()) {
 413             // class appears in exactly one header
 414             cls[0] = (void *)((const char *)this + clshi.clsOffset);
 415             hi[0]  = (void *)((const char *)this + clshi.hiOffset);
 416         }
 417         else {
 418             // class appears in more than one header
 419             uint32_t count = clshi.duplicateCount();
 420             const objc_classheader_t *list =
 421                 &duplicateOffsets()[clshi.duplicateIndex()];
 422             for (uint32_t i = 0; i < count; i++) {
 423                 cls[i] = (void *)((const char *)this + list[i].clsOffset);
 424                 hi[i]  = (void *)((const char *)this + list[i].hiOffset);
 425             }
 426         }
 427     }
 428
 429 #ifdef SELOPT_WRITE
 430
 431     size_t size()
 432     {
 433         return
 434             objc_stringhash_t::size()
 435             + capacity * sizeof(objc_classheader_t)
 436             + sizeof(duplicateCount())
 437             + duplicateCount() * sizeof(objc_classheader_t);
 438     }
 439
 440     void byteswap(bool little_endian)
 441     {
 442         objc_classheader_t *o;
 443
 444         o = classOffsets();
 445         for (uint32_t i = 0; i < capacity; i++) {
 446             S32(o[i].clsOffset);
 447             S32(o[i].hiOffset);
 448         }
 449
 450         o = duplicateOffsets();
 451         for (uint32_t i = 0; i < duplicateCount(); i++) {
 452             S32(o[i].clsOffset);
 453             S32(o[i].hiOffset);
 454         }
 455
 456         S32(duplicateCount());
 457
 458         objc_stringhash_t::byteswap(little_endian);
 459     }
 460
 461     const char *write(uint64_t base, size_t remaining,
 462                       string_map& strings, class_map& classes, bool verbose)
 463     {
 464         const char *err;
 465         err = objc_stringhash_t::write(base, remaining, strings);
 466         if (err) return err;
 467
 468         if (size() > remaining) {
 469             return "selector section too small (metadata not optimized)";
 470         }
 471
 472         // Set class offsets to 0
 473         for (uint32_t i = 0; i < capacity; i++) {
 474             classOffsets()[i].clsOffset = 0;
 475             classOffsets()[i].hiOffset = 0;
 476         }
 477
 478         // Set real class offsets
 479 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 480         class_map::const_iterator c;
 481         for (c = classes.begin(); c != classes.end(); ++c) {
 482             uint32_t h = getIndex(c->first);
 483             if (h == INDEX_NOT_FOUND) {
 484                 return "class list busted (metadata not optimized)";
 485             }
 486
 487             if (classOffsets()[h].clsOffset != 0) {
 488                 // already did this class
 489                 continue;
 490             }
 491
 492             uint32_t count = classes.count(c->first);
 493             if (count == 1) {
 494                 // only one class with this name
 495
 496                 int64_t coff = c->second.first - base;
 497                 int64_t hoff = c->second.second - base;
 498                 if ((coff<<SHIFT)>>SHIFT != coff) {
 499                     return "class offset too big (metadata not optimized)";
 500                 }
 501                 if ((hoff<<SHIFT)>>SHIFT != hoff) {
 502                     return "header offset too big (metadata not optimized)";
 503                 }
 504
 505                 classOffsets()[h].clsOffset = (objc_stringhash_offset_t)coff;
 506                 classOffsets()[h].hiOffset  = (objc_stringhash_offset_t)hoff;
 507             }
 508             else {
 509                 // class name has duplicates - write them all now
 510                 if (verbose) {
 511                     fprintf(stderr, "update_dyld_shared_cache: %u duplicates of Objective-C class %s\n", count, c->first);
 512                 }
 513
 514                 uint32_t dest = duplicateCount();
 515                 duplicateCount() += count;
 516                 if (size() > remaining) {
 517                     return "selector section too small (metadata not optimized)";
 518                 }
 519
 520                 // classOffsets() instead contains count and array index
 521                 classOffsets()[h].clsOffset = count*2 + 1;
 522                 classOffsets()[h].hiOffset = dest;
 523
 524                 std::pair<class_map::const_iterator, class_map::const_iterator>
 525                     duplicates = classes.equal_range(c->first);
 526                 class_map::const_iterator dup;
 527                 for (dup = duplicates.first; dup != duplicates.second; ++dup) {
 528                     int64_t coff = dup->second.first - base;
 529                     int64_t hoff = dup->second.second - base;
 530                     if ((coff<<SHIFT)>>SHIFT != coff) {
 531                         return "class offset too big (metadata not optimized)";
 532                     }
 533                     if ((hoff<<SHIFT)>>SHIFT != hoff) {
 534                         return "header offset too big (metadata not optimized)";
 535                     }
 536
 537                     duplicateOffsets()[dest].clsOffset = (objc_stringhash_offset_t)coff;
 538                     duplicateOffsets()[dest].hiOffset  = (objc_stringhash_offset_t)hoff;
 539                     dest++;
 540                 }
 541             }
 542         }
 543 #       undef SHIFT
 544
 545         return NULL;
 546     }
 547
 548 // SELOPT_WRITE
 549 #endif
 550 };
 551
 552
 553
 554 struct objc_protocolopt_t : objc_stringhash_t {
 555     // ...objc_stringhash_t fields...
 556     // uint32_t protocolOffsets[capacity]; /* offsets from &capacity to protocol_t */
 557
 558     objc_stringhash_offset_t *protocolOffsets() { return (objc_stringhash_offset_t *)&offsets()[capacity]; }
 559     const objc_stringhash_offset_t *protocolOffsets() const { return (const objc_stringhash_offset_t *)&offsets()[capacity]; }
 560
 561     void* getProtocol(const char *key) const
 562     {
 563         uint32_t h = getIndex(key);
 564         if (h == INDEX_NOT_FOUND) {
 565             return NULL;
 566         }
 567
 568         return (void *)((const char *)this + protocolOffsets()[h]);
 569     }
 570
 571 #ifdef SELOPT_WRITE
 572
 573     size_t size()
 574     {
 575         return
 576             objc_stringhash_t::size() + capacity * sizeof(objc_stringhash_offset_t);
 577     }
 578
 579     void byteswap(bool little_endian)
 580     {
 581         objc_stringhash_offset_t *o;
 582
 583         o = protocolOffsets();
 584         for (objc_stringhash_offset_t i = 0; i < capacity; i++) {
 585             S32(o[i]);
 586         }
 587
 588         objc_stringhash_t::byteswap(little_endian);
 589     }
 590
 591     const char *write(uint64_t base, size_t remaining,
 592                       string_map& strings, protocol_map& protocols,
 593                       bool verbose)
 594     {
 595         const char *err;
 596         err = objc_stringhash_t::write(base, remaining, strings);
 597         if (err) return err;
 598
 599         if (size() > remaining) {
 600             return "selector section too small (metadata not optimized)";
 601         }
 602
 603         // Set protocol offsets to 0
 604         for (uint32_t i = 0; i < capacity; i++) {
 605             protocolOffsets()[i] = 0;
 606         }
 607
 608         // Set real protocol offsets
 609 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 610         protocol_map::const_iterator c;
 611         for (c = protocols.begin(); c != protocols.end(); ++c) {
 612             uint32_t h = getIndex(c->first);
 613             if (h == INDEX_NOT_FOUND) {
 614                 return "protocol list busted (metadata not optimized)";
 615             }
 616
 617             int64_t offset = c->second - base;
 618             if ((offset<<SHIFT)>>SHIFT != offset) {
 619                 return "protocol offset too big (metadata not optimized)";
 620             }
 621
 622             protocolOffsets()[h] = (objc_stringhash_offset_t)offset;
 623         }
 624 #       undef SHIFT
 625
 626         return NULL;
 627     }
 628
 629 // SELOPT_WRITE
 630 #endif
 631 };
 632
 633
 634 // Precomputed image list.
 635 struct objc_headeropt_t;
 636
 637 // Precomputed class list.
 638 struct objc_clsopt_t;
 639
 640 // Edit objc-sel-table.s if you change this value.
 641 enum { VERSION = 13 };
 642
 643 // Top-level optimization structure.
 644 // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure.
 645 struct alignas(alignof(void*)) objc_opt_t {
 646     uint32_t version;
 647     int32_t selopt_offset;
 648     int32_t headeropt_offset;
 649     int32_t clsopt_offset;
 650     int32_t protocolopt_offset;
 651
 652     const objc_selopt_t* selopt() const {
 653         if (selopt_offset == 0) return NULL;
 654         return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
 655     }
 656     objc_selopt_t* selopt() {
 657         if (selopt_offset == 0) return NULL;
 658         return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
 659     }
 660
 661     struct objc_headeropt_t* headeropt() const {
 662         if (headeropt_offset == 0) return NULL;
 663         return (struct objc_headeropt_t *)((uint8_t *)this + headeropt_offset);
 664     }
 665
 666     struct objc_clsopt_t* clsopt() const {
 667         if (clsopt_offset == 0) return NULL;
 668         return (objc_clsopt_t *)((uint8_t *)this + clsopt_offset);
 669     }
 670
 671     struct objc_protocolopt_t* protocolopt() const {
 672         if (protocolopt_offset == 0) return NULL;
 673         return (objc_protocolopt_t *)((uint8_t *)this + protocolopt_offset);
 674     }
 675 };
 676
 677 // sizeof(objc_opt_t) must be pointer-aligned
 678 STATIC_ASSERT(sizeof(objc_opt_t) % sizeof(void*) == 0);
 679
 680 // Initializer for empty opt of type uint32_t[].
 681 #define X8(x) x, x, x, x, x, x, x, x
 682 #define X64(x) X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x)
 683 #define X256(x) X64(x), X64(x), X64(x), X64(x)
 684 #define OPT_INITIALIZER {                                           \
 685         /* objc_opt_t */                                            \
 686         objc_opt::VERSION, 16, 0, 0,                                \
 687         /* objc_selopt_t */                                         \
 688         4, 4, 63, 3, 0, 0, 0,0, X256(0), 0, 0, 16, 16, 16, 16       \
 689         /* no objc_headeropt_t */                                   \
 690         /* no objc_clsopt_t */                                      \
 691         /* no objc_protocolopt_t */                                 \
 692 }
 693
 694
 695 // List of offsets in libobjc that the shared cache optimization needs to use.
 696 template <typename T>
 697 struct objc_opt_pointerlist_tt {
 698     T protocolClass;
 699 };
 700 typedef struct objc_opt_pointerlist_tt<uintptr_t> objc_opt_pointerlist_t;
 701
 702
 703 /*
 704 --------------------------------------------------------------------
 705 mix -- mix 3 64-bit values reversibly.
 706 mix() takes 48 machine instructions, but only 24 cycles on a superscalar
 707   machine (like Intel's new MMX architecture).  It requires 4 64-bit
 708   registers for 4::2 parallelism.
 709 All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
 710   (a,b,c), and all deltas of bottom bits were tested.  All deltas were
 711   tested both on random keys and on keys that were nearly all zero.
 712   These deltas all cause every bit of c to change between 1/3 and 2/3
 713   of the time (well, only 113/400 to 287/400 of the time for some
 714   2-bit delta).  These deltas all cause at least 80 bits to change
 715   among (a,b,c) when the mix is run either forward or backward (yes it
 716   is reversible).
 717 This implies that a hash using mix64 has no funnels.  There may be
 718   characteristics with 3-bit deltas or bigger, I didn't test for
 719   those.
 720 --------------------------------------------------------------------
 721 */
 722 #define mix64(a,b,c) \
 723 { \
 724   a -= b; a -= c; a ^= (c>>43); \
 725   b -= c; b -= a; b ^= (a<<9); \
 726   c -= a; c -= b; c ^= (b>>8); \
 727   a -= b; a -= c; a ^= (c>>38); \
 728   b -= c; b -= a; b ^= (a<<23); \
 729   c -= a; c -= b; c ^= (b>>5); \
 730   a -= b; a -= c; a ^= (c>>35); \
 731   b -= c; b -= a; b ^= (a<<49); \
 732   c -= a; c -= b; c ^= (b>>11); \
 733   a -= b; a -= c; a ^= (c>>12); \
 734   b -= c; b -= a; b ^= (a<<18); \
 735   c -= a; c -= b; c ^= (b>>22); \
 736 }
 737
 738 /*
 739 --------------------------------------------------------------------
 740 hash() -- hash a variable-length key into a 64-bit value
 741   k     : the key (the unaligned variable-length array of bytes)
 742   len   : the length of the key, counting by bytes
 743   level : can be any 8-byte value
 744 Returns a 64-bit value.  Every bit of the key affects every bit of
 745 the return value.  No funnels.  Every 1-bit and 2-bit delta achieves
 746 avalanche.  About 41+5len instructions.
 747
 748 The best hash table sizes are powers of 2.  There is no need to do
 749 mod a prime (mod is sooo slow!).  If you need less than 64 bits,
 750 use a bitmask.  For example, if you need only 10 bits, do
 751   h = (h & hashmask(10));
 752 In which case, the hash table should have hashsize(10) elements.
 753
 754 If you are hashing n strings (uint8_t **)k, do it like this:
 755   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
 756
 757 By Bob Jenkins, Jan 4 1997.  bob_jenkins@burtleburtle.net.  You may
 758 use this code any way you wish, private, educational, or commercial,
 759 but I would appreciate if you give me credit.
 760
 761 See http://burtleburtle.net/bob/hash/evahash.html
 762 Use for hash table lookup, or anything where one collision in 2^^64
 763 is acceptable.  Do NOT use for cryptographic purposes.
 764 --------------------------------------------------------------------
 765 */
 766
 767 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level)
 768 // uint8_t *k;        /* the key */
 769 // uint64_t  length;   /* the length of the key */
 770 // uint64_t  level;    /* the previous hash, or an arbitrary value */
 771 {
 772   uint64_t a,b,c;
 773   size_t len;
 774
 775   /* Set up the internal state */
 776   len = length;
 777   a = b = level;                         /* the previous hash value */
 778   c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
 779
 780   /*---------------------------------------- handle most of the key */
 781   while (len >= 24)
 782   {
 783     a += (k[0]        +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24)
 784      +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56));
 785     b += (k[8]        +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24)
 786      +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56));
 787     c += (k[16]       +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24)
 788      +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56));
 789     mix64(a,b,c);
 790     k += 24; len -= 24;
 791   }
 792
 793   /*------------------------------------- handle the last 23 bytes */
 794   c += length;
 795   switch(len)              /* all the case statements fall through */
 796   {
 797   case 23: c+=((uint64_t)k[22]<<56);
 798   case 22: c+=((uint64_t)k[21]<<48);
 799   case 21: c+=((uint64_t)k[20]<<40);
 800   case 20: c+=((uint64_t)k[19]<<32);
 801   case 19: c+=((uint64_t)k[18]<<24);
 802   case 18: c+=((uint64_t)k[17]<<16);
 803   case 17: c+=((uint64_t)k[16]<<8);
 804     /* the first byte of c is reserved for the length */
 805   case 16: b+=((uint64_t)k[15]<<56);
 806   case 15: b+=((uint64_t)k[14]<<48);
 807   case 14: b+=((uint64_t)k[13]<<40);
 808   case 13: b+=((uint64_t)k[12]<<32);
 809   case 12: b+=((uint64_t)k[11]<<24);
 810   case 11: b+=((uint64_t)k[10]<<16);
 811   case 10: b+=((uint64_t)k[ 9]<<8);
 812   case  9: b+=((uint64_t)k[ 8]);
 813   case  8: a+=((uint64_t)k[ 7]<<56);
 814   case  7: a+=((uint64_t)k[ 6]<<48);
 815   case  6: a+=((uint64_t)k[ 5]<<40);
 816   case  5: a+=((uint64_t)k[ 4]<<32);
 817   case  4: a+=((uint64_t)k[ 3]<<24);
 818   case  3: a+=((uint64_t)k[ 2]<<16);
 819   case  2: a+=((uint64_t)k[ 1]<<8);
 820   case  1: a+=((uint64_t)k[ 0]);
 821     /* case 0: nothing left to add */
 822   }
 823   mix64(a,b,c);
 824   /*-------------------------------------------- report the result */
 825   return c;
 826 }
 827
 828
 829 #ifdef SELOPT_WRITE
 830
 831 /*
 832 ------------------------------------------------------------------------------
 833 This generates a minimal perfect hash function.  That means, given a
 834 set of n keys, this determines a hash function that maps each of
 835 those keys into a value in 0..n-1 with no collisions.
 836
 837 The perfect hash function first uses a normal hash function on the key
 838 to determine (a,b) such that the pair (a,b) is distinct for all
 839 keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
 840 tab[] is an array of 1-byte values and scramble[] is a 256-term array of
 841 2-byte or 4-byte values.  If there are n keys, the length of tab[] is a
 842 power of two between n/3 and n.
 843
 844 I found the idea of computing distinct (a,b) values in "Practical minimal
 845 perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
 846 Communications of the ACM, January 1992.  They found the idea in Chichelli
 847 (CACM Jan 1980).  Beyond that, our methods differ.
 848
 849 The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
 850 0..*blen*-1.  A fast hash function determines both a and b
 851 simultaneously.  Any decent hash function is likely to produce
 852 hashes so that (a,b) is distinct for all pairs.  I try the hash
 853 using different values of *salt* until all pairs are distinct.
 854
 855 The final hash is (a XOR scramble[tab[b]]).  *scramble* is a
 856 predetermined mapping of 0..255 into 0..smax-1.  *tab* is an
 857 array that we fill in in such a way as to make the hash perfect.
 858
 859 First we fill in all values of *tab* that are used by more than one
 860 key.  We try all possible values for each position until one works.
 861
 862 This leaves m unmapped keys and m values that something could hash to.
 863 If you treat unmapped keys as lefthand nodes and unused hash values
 864 as righthand nodes, and draw a line connecting each key to each hash
 865 value it could map to, you get a bipartite graph.  We attempt to
 866 find a perfect matching in this graph.  If we succeed, we have
 867 determined a perfect hash for the whole set of keys.
 868
 869 *scramble* is used because (a^tab[i]) clusters keys around *a*.
 870 ------------------------------------------------------------------------------
 871 */
 872
 873 typedef uint64_t  ub8;
 874 #define UB8MAXVAL 0xffffffffffffffffLL
 875 #define UB8BITS 64
 876 typedef uint32_t  ub4;
 877 #define UB4MAXVAL 0xffffffff
 878 #define UB4BITS 32
 879 typedef uint16_t  ub2;
 880 #define UB2MAXVAL 0xffff
 881 #define UB2BITS 16
 882 typedef uint8_t ub1;
 883 #define UB1MAXVAL 0xff
 884 #define UB1BITS 8
 885
 886 #define TRUE  1
 887 #define FALSE 0
 888
 889 #define SCRAMBLE_LEN 256 // ((ub4)1<<16)                    /* length of *scramble* */
 890 #define RETRY_INITKEY 2048  /* number of times to try to find distinct (a,b) */
 891 #define RETRY_PERFECT 4     /* number of times to try to make a perfect hash */
 892
 893
 894 /* representation of a key */
 895 struct key
 896 {
 897   ub1        *name_k;                                      /* the actual key */
 898   ub4         len_k;                         /* the length of the actual key */
 899   ub4         hash_k;                 /* the initial hash value for this key */
 900 /* beyond this point is mapping-dependent */
 901   ub4         a_k;                            /* a, of the key maps to (a,b) */
 902   ub4         b_k;                            /* b, of the key maps to (a,b) */
 903   struct key *nextb_k;                               /* next key with this b */
 904 };
 905 typedef  struct key  key;
 906
 907 /* things indexed by b of original (a,b) pair */
 908 struct bstuff
 909 {
 910   ub2  val_b;                                        /* hash=a^tabb[b].val_b */
 911   key *list_b;                   /* tabb[i].list_b is list of keys with b==i */
 912   ub4  listlen_b;                                        /* length of list_b */
 913   ub4  water_b;           /* high watermark of who has visited this map node */
 914 };
 915 typedef  struct bstuff  bstuff;
 916
 917 /* things indexed by final hash value */
 918 struct hstuff
 919 {
 920   key *key_h;                   /* tabh[i].key_h is the key with a hash of i */
 921 };
 922 typedef  struct hstuff hstuff;
 923
 924 /* things indexed by queue position */
 925 struct qstuff
 926 {
 927   bstuff *b_q;                        /* b that currently occupies this hash */
 928   ub4     parent_q;     /* queue position of parent that could use this hash */
 929   ub2     newval_q;      /* what to change parent tab[b] to to use this hash */
 930   ub2     oldval_q;                              /* original value of tab[b] */
 931 };
 932 typedef  struct qstuff  qstuff;
 933
 934
 935 /*
 936 ------------------------------------------------------------------------------
 937 Find the mapping that will produce a perfect hash
 938 ------------------------------------------------------------------------------
 939 */
 940
 941 /* return the ceiling of the log (base 2) of val */
 942 static ub4  log2u(ub4 val)
 943 {
 944   ub4 i;
 945   for (i=0; ((ub4)1<<i) < val; ++i)
 946     ;
 947   return i;
 948 }
 949
 950 /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
 951 /* permute(0)=0.  This is intended and useful. */
 952 static ub4  permute(ub4 x, ub4 nbits)
 953 // ub4 x;                                       /* input, a value in some range */
 954 // ub4 nbits;                                 /* input, number of bits in range */
 955 {
 956   int i;
 957   int mask   = ((ub4)1<<nbits)-1;                                /* all ones */
 958   int const2 = 1+nbits/2;
 959   int const3 = 1+nbits/3;
 960   int const4 = 1+nbits/4;
 961   int const5 = 1+nbits/5;
 962   for (i=0; i<20; ++i)
 963   {
 964     x = (x+(x<<const2)) & mask;
 965     x = (x^(x>>const3));
 966     x = (x+(x<<const4)) & mask;
 967     x = (x^(x>>const5));
 968   }
 969   return x;
 970 }
 971
 972 /* initialize scramble[] with distinct random values in 0..smax-1 */
 973 static void scrambleinit(ub4 *scramble, ub4 smax)
 974 // ub4      *scramble;                            /* hash is a^scramble[tab[b]] */
 975 // ub4       smax;                    /* scramble values should be in 0..smax-1 */
 976 {
 977   ub4 i;
 978
 979   /* fill scramble[] with distinct random integers in 0..smax-1 */
 980   for (i=0; i<SCRAMBLE_LEN; ++i)
 981   {
 982     scramble[i] = permute(i, log2u(smax));
 983   }
 984 }
 985
 986
 987 /*
 988  * put keys in tabb according to key->b_k
 989  * check if the initial hash might work
 990  */
 991 static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete)
 992 // bstuff   *tabb;                     /* output, list of keys with b for (a,b) */
 993 // ub4       blen;                                            /* length of tabb */
 994 // key      *keys;                               /* list of keys already hashed */
 995 // int       complete;        /* TRUE means to complete init despite collisions */
 996 {
 997   int  nocollision = TRUE;
 998   ub4 i;
 999
1000   memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
1001
1002   /* Two keys with the same (a,b) guarantees a collision */
1003   for (i = 0; i < nkeys; i++) {
1004     key *mykey = keys+i;
1005     key *otherkey;
1006
1007     for (otherkey=tabb[mykey->b_k].list_b;
1008          otherkey;
1009          otherkey=otherkey->nextb_k)
1010     {
1011       if (mykey->a_k == otherkey->a_k)
1012       {
1013         nocollision = FALSE;
1014         if (!complete)
1015           return FALSE;
1016       }
1017     }
1018     ++tabb[mykey->b_k].listlen_b;
1019     mykey->nextb_k = tabb[mykey->b_k].list_b;
1020     tabb[mykey->b_k].list_b = mykey;
1021   }
1022
1023   /* no two keys have the same (a,b) pair */
1024   return nocollision;
1025 }
1026
1027
1028 /* Do the initial hash for normal mode (use lookup and checksum) */
1029 static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt)
1030 // key      *keys;                                          /* list of all keys */
1031 // ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
1032 // ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
1033 // ub4       smax;                   /* maximum range of computable hash values */
1034 // ub4       salt;                     /* used to initialize the hash function */
1035 // gencode  *final;                          /* output, code for the final hash */
1036 {
1037   ub4 loga = log2u(alen);                            /* log based 2 of blen */
1038   ub4 i;
1039   for (i = 0; i < nkeys; i++) {
1040     key *mykey = keys+i;
1041     ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt);
1042     mykey->a_k = (loga > 0) ? hash>>(UB8BITS-loga) : 0;
1043     mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
1044   }
1045 }
1046
1047
1048 /* Try to apply an augmenting list */
1049 static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback)
1050 // bstuff *tabb;
1051 // hstuff *tabh;
1052 // qstuff *tabq;
1053 // ub4     blen;
1054 // ub4    *scramble;
1055 // ub4     tail;
1056 // int     rollback;          /* FALSE applies augmenting path, TRUE rolls back */
1057 {
1058   ub4     hash;
1059   key    *mykey;
1060   bstuff *pb;
1061   ub4     child;
1062   ub4     parent;
1063   ub4     stabb;                                         /* scramble[tab[b]] */
1064
1065   /* walk from child to parent */
1066   for (child=tail-1; child; child=parent)
1067   {
1068     parent = tabq[child].parent_q;                    /* find child's parent */
1069     pb     = tabq[parent].b_q;             /* find parent's list of siblings */
1070
1071     /* erase old hash values */
1072     stabb = scramble[pb->val_b];
1073     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
1074     {
1075       hash = mykey->a_k^stabb;
1076       if (mykey == tabh[hash].key_h)
1077       {                            /* erase hash for all of child's siblings */
1078         tabh[hash].key_h = (key *)0;
1079       }
1080     }
1081
1082     /* change pb->val_b, which will change the hashes of all parent siblings */
1083     pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
1084
1085     /* set new hash values */
1086     stabb = scramble[pb->val_b];
1087     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
1088     {
1089       hash = mykey->a_k^stabb;
1090       if (rollback)
1091       {
1092         if (parent == 0) continue;                  /* root never had a hash */
1093       }
1094       else if (tabh[hash].key_h)
1095       {
1096         /* very rare: roll back any changes */
1097         apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
1098         return FALSE;                                  /* failure, collision */
1099       }
1100       tabh[hash].key_h = mykey;
1101     }
1102   }
1103   return TRUE;
1104 }
1105
1106
1107 /*
1108 -------------------------------------------------------------------------------
1109 augment(): Add item to the mapping.
1110
1111 Construct a spanning tree of *b*s with *item* as root, where each
1112 parent can have all its hashes changed (by some new val_b) with
1113 at most one collision, and each child is the b of that collision.
1114
1115 I got this from Tarjan's "Data Structures and Network Algorithms".  The
1116 path from *item* to a *b* that can be remapped with no collision is
1117 an "augmenting path".  Change values of tab[b] along the path so that
1118 the unmapped key gets mapped and the unused hash value gets used.
1119
1120 Assuming 1 key per b, if m out of n hash values are still unused,
1121 you should expect the transitive closure to cover n/m nodes before
1122 an unused node is found.  Sum(i=1..n)(n/i) is about nlogn, so expect
1123 this approach to take about nlogn time to map all single-key b's.
1124 -------------------------------------------------------------------------------
1125 */
1126 static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys,
1127                    ub4 highwater)
1128 // bstuff   *tabb;                                        /* stuff indexed by b */
1129 // hstuff   *tabh;  /* which key is associated with which hash, indexed by hash */
1130 // qstuff   *tabq;            /* queue of *b* values, this is the spanning tree */
1131 // ub4       blen;                                            /* length of tabb */
1132 // ub4      *scramble;                      /* final hash is a^scramble[tab[b]] */
1133 // ub4       smax;                                 /* highest value in scramble */
1134 // bstuff   *item;                           /* &tabb[b] for the b to be mapped */
1135 // ub4       nkeys;                         /* final hash must be in 0..nkeys-1 */
1136 // ub4       highwater;        /* a value higher than any now in tabb[].water_b */
1137 {
1138   ub4  q;                      /* current position walking through the queue */
1139   ub4  tail;              /* tail of the queue.  0 is the head of the queue. */
1140   ub4  limit=UB1MAXVAL+1;
1141   ub4  highhash = smax;
1142
1143   /* initialize the root of the spanning tree */
1144   tabq[0].b_q = item;
1145   tail = 1;
1146
1147   /* construct the spanning tree by walking the queue, add children to tail */
1148   for (q=0; q<tail; ++q)
1149   {
1150     bstuff *myb = tabq[q].b_q;                        /* the b for this node */
1151     ub4     i;                              /* possible value for myb->val_b */
1152
1153     if (q == 1)
1154       break;                                  /* don't do transitive closure */
1155
1156     for (i=0; i<limit; ++i)
1157     {
1158       bstuff *childb = (bstuff *)0;             /* the b that this i maps to */
1159       key    *mykey;                       /* for walking through myb's keys */
1160
1161       for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
1162       {
1163         key    *childkey;
1164         ub4 hash = mykey->a_k^scramble[i];
1165
1166         if (hash >= highhash) break;                        /* out of bounds */
1167         childkey = tabh[hash].key_h;
1168
1169         if (childkey)
1170         {
1171           bstuff *hitb = &tabb[childkey->b_k];
1172
1173           if (childb)
1174           {
1175             if (childb != hitb) break;            /* hit at most one child b */
1176           }
1177           else
1178           {
1179             childb = hitb;                        /* remember this as childb */
1180             if (childb->water_b == highwater) break;     /* already explored */
1181           }
1182         }
1183       }
1184       if (mykey) continue;             /* myb with i has multiple collisions */
1185
1186       /* add childb to the queue of reachable things */
1187       if (childb) childb->water_b = highwater;
1188       tabq[tail].b_q      = childb;
1189       tabq[tail].newval_q = i;     /* how to make parent (myb) use this hash */
1190       tabq[tail].oldval_q = myb->val_b;            /* need this for rollback */
1191       tabq[tail].parent_q = q;
1192       ++tail;
1193
1194       if (!childb)
1195       {                                  /* found an *i* with no collisions? */
1196         /* try to apply the augmenting path */
1197         if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
1198           return TRUE;        /* success, item was added to the perfect hash */
1199
1200         --tail;                    /* don't know how to handle such a child! */
1201       }
1202     }
1203   }
1204   return FALSE;
1205 }
1206
1207
1208 /* find a mapping that makes this a perfect hash */
1209 static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys)
1210 {
1211   ub4 maxkeys;                           /* maximum number of keys for any b */
1212   ub4 i, j;
1213
1214 #if SELOPT_DEBUG
1215   fprintf(stderr, "           blen %d smax %d nkeys %d\n", blen, smax, nkeys);
1216 #endif
1217
1218   /* clear any state from previous attempts */
1219   memset((void *)tabh, 0, sizeof(hstuff)*smax);
1220   memset((void *)tabq, 0, sizeof(qstuff)*(blen+1));
1221
1222   for (maxkeys=0,i=0; i<blen; ++i)
1223     if (tabb[i].listlen_b > maxkeys)
1224       maxkeys = tabb[i].listlen_b;
1225
1226   /* In descending order by number of keys, map all *b*s */
1227   for (j=maxkeys; j>0; --j)
1228     for (i=0; i<blen; ++i)
1229       if (tabb[i].listlen_b == j)
1230         if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys,
1231                      i+1))
1232         {
1233           return FALSE;
1234         }
1235
1236   /* Success!  We found a perfect hash of all keys into 0..nkeys-1. */
1237   return TRUE;
1238 }
1239
1240
1241 /* guess initial values for alen and blen */
1242 static void initalen(ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys)
1243 // ub4      *alen;                                      /* output, initial alen */
1244 // ub4      *blen;                                      /* output, initial blen */
1245 // ub4      smax;    /* input, power of two greater or equal to max hash value */
1246 // ub4       nkeys;                              /* number of keys being hashed */
1247 {
1248   /*
1249    * Find initial *alen, *blen
1250    * Initial alen and blen values were found empirically.  Some factors:
1251    *
1252    * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
1253    *
1254    * alen and blen must be powers of 2 because the values in 0..alen-1 and
1255    * 0..blen-1 are produced by applying a bitmask to the initial hash function.
1256    *
1257    * alen must be less than smax, in fact less than nkeys, because otherwise
1258    * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
1259    * all the *a*s associated with a given *b*, so there would be no legal
1260    * value to assign to tab[b].  This only matters when we're doing a minimal
1261    * perfect hash.
1262    *
1263    * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
1264    * and alen*blen = smax*smax/32.
1265    *
1266    * Values of blen less than smax/4 never work, and smax/2 always works.
1267    *
1268    * We want blen as small as possible because it is the number of bytes in
1269    * the huge array we must create for the perfect hash.
1270    *
1271    * When nkey <= smax*(5/8), blen=smax/4 works much more often with
1272    * alen=smax/8 than with alen=smax/4.  Above smax*(5/8), blen=smax/4
1273    * doesn't seem to care whether alen=smax/8 or alen=smax/4.  I think it
1274    * has something to do with 5/8 = 1/8 * 5.  For example examine 80000,
1275    * 85000, and 90000 keys with different values of alen.  This only matters
1276    * if we're doing a minimal perfect hash.
1277    *
1278    * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
1279    * Bigger than that it must produce two integers, which increases the
1280    * cost of the hash per character hashed.
1281    */
1282   *alen = smax;                     /* no reason to restrict alen to smax/2 */
1283   *blen = ((nkeys <= smax*0.6) ? smax/16 :
1284            (nkeys <= smax*0.8) ? smax/8 : smax/4);
1285
1286   if (*alen < 1) *alen = 1;
1287   if (*blen < 1) *blen = 1;
1288
1289 #if SELOPT_DEBUG
1290   fprintf(stderr, "alen %d blen %d smax %d nkeys %d\n", *alen, *blen, smax, nkeys);
1291 #endif
1292 }
1293
1294 /*
1295 ** Try to find a perfect hash function.
1296 ** Return the successful initializer for the initial hash.
1297 ** Return 0 if no perfect hash could be found.
1298 */
1299 static int findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub8 *salt,
1300                     ub4 *scramble, ub4 smax, key *keys, ub4 nkeys)
1301 // bstuff  **tabb;           /* output, tab[] of the perfect hash, length *blen */
1302 // ub4      *alen;                 /* output, 0..alen-1 is range for a of (a,b) */
1303 // ub4      *blen;                 /* output, 0..blen-1 is range for b of (a,b) */
1304 // ub4      *salt;                         /* output, initializes initial hash */
1305 // ub4      *scramble;                      /* input, hash = a^scramble[tab[b]] */
1306 // ub4      smax;                           /* input, scramble[i] in 0..smax-1 */
1307 // key      *keys;                                       /* input, keys to hash */
1308 // ub4       nkeys;                       /* input, number of keys being hashed */
1309 {
1310   ub4 bad_initkey;                       /* how many times did initkey fail? */
1311   ub4 bad_perfect;                       /* how many times did perfect fail? */
1312   ub4 si;                        /* trial initializer for initial hash */
1313   ub4 maxalen;
1314   hstuff *tabh;                       /* table of keys indexed by hash value */
1315   qstuff *tabq;    /* table of stuff indexed by queue value, used by augment */
1316
1317   /* guess initial values for alen and blen */
1318   initalen(alen, blen, smax, nkeys);
1319
1320   scrambleinit(scramble, smax);
1321
1322   maxalen = smax;
1323
1324   /* allocate working memory */
1325   *tabb = new bstuff[*blen];
1326   tabq  = new qstuff[*blen+1];
1327   tabh  = new hstuff[smax];
1328
1329   /* Actually find the perfect hash */
1330   *salt = 0;
1331   bad_initkey = 0;
1332   bad_perfect = 0;
1333   for (si=1; ; ++si)
1334   {
1335     ub4 rslinit;
1336     /* Try to find distinct (A,B) for all keys */
1337     *salt = si * 0x9e3779b97f4a7c13LL; /* golden ratio (arbitrary value) */
1338     initnorm(keys, nkeys, *alen, *blen, smax, *salt);
1339     rslinit = inittab(*tabb, *blen, keys, nkeys, FALSE);
1340     if (rslinit == 0)
1341     {
1342       /* didn't find distinct (a,b) */
1343       if (++bad_initkey >= RETRY_INITKEY)
1344       {
1345         /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
1346         if (*alen < maxalen)
1347         {
1348           *alen *= 2;
1349         }
1350         else if (*blen < smax)
1351         {
1352           *blen *= 2;
1353           delete[] tabq;
1354           delete[] *tabb;
1355           *tabb  = new bstuff[*blen];
1356           tabq  = new qstuff[*blen+1];
1357         }
1358         bad_initkey = 0;
1359         bad_perfect = 0;
1360       }
1361       continue;                             /* two keys have same (a,b) pair */
1362     }
1363
1364     /* Given distinct (A,B) for all keys, build a perfect hash */
1365     if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys))
1366     {
1367       if (++bad_perfect >= RETRY_PERFECT)
1368       {
1369         if (*blen < smax)
1370         {
1371           *blen *= 2;
1372           delete[] *tabb;
1373           delete[] tabq;
1374           *tabb  = new bstuff[*blen];
1375           tabq  = new qstuff[*blen+1];
1376           --si;               /* we know this salt got distinct (A,B) */
1377         }
1378         else
1379         {
1380           return 0;
1381         }
1382         bad_perfect = 0;
1383       }
1384       continue;
1385     }
1386
1387     break;
1388   }
1389
1390   /* free working memory */
1391   delete[] tabh;
1392   delete[] tabq;
1393
1394   return 1;
1395 }
1396
1397 /*
1398 ------------------------------------------------------------------------------
1399 Input/output type routines
1400 ------------------------------------------------------------------------------
1401 */
1402
1403 /* get the list of keys */
1404 static void getkeys(key **keys, ub4 *nkeys, const string_map& strings)
1405 {
1406   key *buf = new key[strings.size()];
1407   size_t i;
1408   string_map::const_iterator s;
1409   for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) {
1410     key *mykey = buf+i;
1411     mykey->name_k = (ub1 *)s->first;
1412     mykey->len_k  = (ub4)strlen(s->first);
1413   }
1414   *keys = buf;
1415   *nkeys = strings.size();
1416 }
1417
1418
1419 static perfect_hash
1420 make_perfect(const string_map& strings)
1421 {
1422   ub4       nkeys;                                         /* number of keys */
1423   key      *keys;                                    /* head of list of keys */
1424   bstuff   *tab;                                       /* table indexed by b */
1425   ub4       smax;            /* scramble[] values in 0..smax-1, a power of 2 */
1426   ub4       alen;                            /* a in 0..alen-1, a power of 2 */
1427   ub4       blen;                            /* b in 0..blen-1, a power of 2 */
1428   ub8       salt;                       /* a parameter to the hash function */
1429   ub4       scramble[SCRAMBLE_LEN];           /* used in final hash function */
1430   int ok;
1431   int i;
1432   perfect_hash result;
1433
1434   /* read in the list of keywords */
1435   getkeys(&keys, &nkeys, strings);
1436
1437   /* find the hash */
1438   smax = ((ub4)1<<log2u(nkeys));
1439   ok = findhash(&tab, &alen, &blen, &salt,
1440                 scramble, smax, keys, nkeys);
1441   if (!ok) {
1442       smax = 2 * ((ub4)1<<log2u(nkeys));
1443       ok = findhash(&tab, &alen, &blen, &salt,
1444                     scramble, smax, keys, nkeys);
1445   }
1446   if (!ok) {
1447       bzero(&result, sizeof(result));
1448   } else {
1449       /* build the tables */
1450       result.capacity = smax;
1451       result.occupied = nkeys;
1452       result.shift = UB8BITS - log2u(alen);
1453       result.mask = blen - 1;
1454       result.salt = salt;
1455
1456       result.tab = new uint8_t[blen];
1457       for (i = 0; i < blen; i++) {
1458           result.tab[i] = tab[i].val_b;
1459       }
1460       for (i = 0; i < 256; i++) {
1461           result.scramble[i] = scramble[i];
1462       }
1463   }
1464
1465   delete[] keys;
1466   delete[] tab;
1467
1468   return result;
1469 }
1470
1471 // SELOPT_WRITE
1472 #endif
1473
1474 // namespace objc_selopt
1475 };
1476
1477 #undef S32
1478 #undef S64
1479
1480 #endif