include/objc-shared-cache.h

   1 /*
   2  * Copyright (c) 2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25 Portions derived from:
  26
  27 --------------------------------------------------------------------
  28 lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
  29 hash(), hash2(), hash3, and mix() are externally useful functions.
  30 Routines to test the hash are included if SELF_TEST is defined.
  31 You can use this free for any purpose.  It has no warranty.
  32 --------------------------------------------------------------------
  33
  34 ------------------------------------------------------------------------------
  35 perfect.c: code to generate code for a hash for perfect hashing.
  36 (c) Bob Jenkins, September 1996, December 1999
  37 You may use this code in any way you wish, and it is free.  No warranty.
  38 I hereby place this in the public domain.
  39 Source is http://burtleburtle.net/bob/c/perfect.c
  40 ------------------------------------------------------------------------------
  41 */
  42
  43 /*
  44  * objc-selopt.h
  45  * Interface between libobjc and dyld
  46  * for selector uniquing in the dyld shared cache.
  47  *
  48  * When building the shared cache, dyld locates all selectors and selector
  49  * references in the cached images. It builds a perfect hash table out of
  50  * them and writes the table into the shared cache copy of libobjc.
  51  * libobjc then uses that table as the builtin selector list.
  52  *
  53  * Versioning
  54  * The table has a version number. dyld and objc can both ignore the table
  55  * if the other used the wrong version number.
  56  *
  57  * Completeness
  58  * Not all libraries are in the shared cache. Libraries that are in the
  59  * shared cache and were optimized are specially marked. Libraries on
  60  * disk never include those marks.
  61  *
  62  * Coherency
  63  * Libraries optimized in the shared cache can be replaced by unoptimized
  64  * copies from disk when loaded. The copy from disk is not marked and will
  65  * be fixed up by libobjc. The shared cache copy is still mapped into the
  66  * process, so the table can point to cstring data in that library's part
  67  * of the shared cache without trouble.
  68  *
  69  * Atomicity
  70  * dyld writes the table itself last. If dyld marks some metadata as
  71  * updated but then fails to write a table for some reason, libobjc
  72  * fixes up all metadata as if it were not marked.
  73  */
  74
  75 #ifndef _OBJC_SELOPT_H
  76 #define _OBJC_SELOPT_H
  77
  78 /*
  79   DO NOT INCLUDE ANY objc HEADERS HERE
  80   dyld USES THIS FILE AND CANNOT SEE THEM
  81 */
  82 #include <stdint.h>
  83 #include <stdlib.h>
  84 #ifdef SELOPT_WRITE
  85 #include <unordered_map>
  86 #endif
  87 /*
  88   DO NOT INCLUDE ANY objc HEADERS HERE
  89   dyld USES THIS FILE AND CANNOT SEE THEM
  90 */
  91
  92 #ifndef STATIC_ASSERT
  93 #   define STATIC_ASSERT(x) _STATIC_ASSERT2(x, __LINE__)
  94 #   define _STATIC_ASSERT2(x, line) _STATIC_ASSERT3(x, line)
  95 #   define _STATIC_ASSERT3(x, line)                                     \
  96         typedef struct {                                                \
  97             int _static_assert[(x) ? 0 : -1];                           \
  98         } _static_assert_ ## line __attribute__((unavailable))
  99 #endif
 100
 101 #define SELOPT_DEBUG 0
 102
 103 #define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x)
 104 #define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x)
 105
 106 namespace objc_opt {
 107
 108 typedef int32_t objc_stringhash_offset_t;
 109 typedef uint8_t objc_stringhash_check_t;
 110
 111 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level);
 112
 113 #ifdef SELOPT_WRITE
 114
 115 // Perfect hash code is at the end of this file.
 116
 117 struct __attribute__((packed)) perfect_hash {
 118     uint32_t capacity;
 119     uint32_t occupied;
 120     uint32_t shift;
 121     uint32_t mask;
 122     uint64_t salt;
 123
 124     uint32_t scramble[256];
 125     uint8_t *tab;  // count == mask+1; free with delete[]
 126
 127     perfect_hash() : tab(0) { }
 128
 129     ~perfect_hash() { if (tab) delete[] tab; }
 130 };
 131
 132 struct eqstr {
 133     bool operator()(const char* s1, const char* s2) const {
 134         return strcmp(s1, s2) == 0;
 135     }
 136 };
 137
 138 struct hashstr {
 139     size_t operator()(const char *s) const {
 140         return (size_t)lookup8((uint8_t *)s, strlen(s), 0);
 141     }
 142 };
 143
 144 // cstring => cstring's vmaddress
 145 // (used for selector names and class names)
 146 typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> string_map;
 147
 148 // protocol name => protocol vmaddress
 149 typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> protocol_map;
 150
 151 // class name => (class vmaddress, header_info vmaddress)
 152 typedef std::unordered_multimap<const char *, std::pair<uint64_t, uint64_t>, hashstr, eqstr> class_map;
 153
 154 static perfect_hash make_perfect(const string_map& strings);
 155
 156 #endif
 157
 158
 159 // Precomputed perfect hash table of strings.
 160 // Base class for precomputed selector table and class table.
 161 // Edit objc-sel-table.s if you change this structure.
 162 struct __attribute__((packed)) objc_stringhash_t {
 163     uint32_t capacity;
 164     uint32_t occupied;
 165     uint32_t shift;
 166     uint32_t mask;
 167     uint32_t unused1;  // was zero
 168     uint32_t unused2;  // alignment pad
 169     uint64_t salt;
 170
 171     uint32_t scramble[256];
 172     uint8_t tab[0];                   /* tab[mask+1] (always power-of-2) */
 173     // uint8_t checkbytes[capacity];  /* check byte for each string */
 174     // int32_t offsets[capacity];     /* offsets from &capacity to cstrings */
 175
 176     objc_stringhash_check_t *checkbytes() { return (objc_stringhash_check_t *)&tab[mask+1]; }
 177     const objc_stringhash_check_t *checkbytes() const { return (const objc_stringhash_check_t *)&tab[mask+1]; }
 178
 179     objc_stringhash_offset_t *offsets() { return (objc_stringhash_offset_t *)&checkbytes()[capacity]; }
 180     const objc_stringhash_offset_t *offsets() const { return (const objc_stringhash_offset_t *)&checkbytes()[capacity]; }
 181
 182     uint32_t hash(const char *key, size_t keylen) const
 183     {
 184         uint64_t val = lookup8((uint8_t*)key, keylen, salt);
 185         uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]];
 186         return index;
 187     }
 188
 189     uint32_t hash(const char *key) const
 190     {
 191         return hash(key, strlen(key));
 192     }
 193
 194     // The check bytes areused to reject strings that aren't in the table
 195     // without paging in the table's cstring data. This checkbyte calculation
 196     // catches 4785/4815 rejects when launching Safari; a perfect checkbyte
 197     // would catch 4796/4815.
 198     objc_stringhash_check_t checkbyte(const char *key, size_t keylen) const
 199     {
 200         return
 201             ((key[0] & 0x7) << 5)
 202             |
 203             ((uint8_t)keylen & 0x1f);
 204     }
 205
 206     objc_stringhash_check_t checkbyte(const char *key) const
 207     {
 208         return checkbyte(key, strlen(key));
 209     }
 210
 211
 212 #define INDEX_NOT_FOUND (~(uint32_t)0)
 213
 214     uint32_t getIndex(const char *key) const
 215     {
 216         size_t keylen = strlen(key);
 217         uint32_t h = hash(key, keylen);
 218
 219         // Use check byte to reject without paging in the table's cstrings
 220         objc_stringhash_check_t h_check = checkbytes()[h];
 221         objc_stringhash_check_t key_check = checkbyte(key, keylen);
 222         bool check_fail = (h_check != key_check);
 223 #if ! SELOPT_DEBUG
 224         if (check_fail) return INDEX_NOT_FOUND;
 225 #endif
 226
 227         objc_stringhash_offset_t offset = offsets()[h];
 228         if (offset == 0) return INDEX_NOT_FOUND;
 229         const char *result = (const char *)this + offset;
 230         if (0 != strcmp(key, result)) return INDEX_NOT_FOUND;
 231
 232 #if SELOPT_DEBUG
 233         if (check_fail) abort();
 234 #endif
 235
 236         return h;
 237     }
 238
 239 #ifdef SELOPT_WRITE
 240
 241     size_t size()
 242     {
 243         return sizeof(objc_stringhash_t)
 244             + mask+1
 245             + capacity * sizeof(objc_stringhash_check_t)
 246             + capacity * sizeof(objc_stringhash_offset_t);
 247     }
 248
 249     void byteswap(bool little_endian)
 250     {
 251         // tab and checkbytes are arrays of bytes, no swap needed
 252         for (uint32_t i = 0; i < 256; i++) {
 253             S32(scramble[i]);
 254         }
 255         objc_stringhash_offset_t *o = offsets();
 256         for (uint32_t i = 0; i < capacity; i++) {
 257             S32(o[i]);
 258         }
 259
 260         S32(capacity);
 261         S32(occupied);
 262         S32(shift);
 263         S32(mask);
 264         S64(salt);
 265     }
 266
 267     const char *write(uint64_t base, size_t remaining, string_map& strings)
 268     {
 269         if (sizeof(objc_stringhash_t) > remaining) {
 270             return "selector section too small (metadata not optimized)";
 271         }
 272
 273         if (strings.size() == 0) {
 274             bzero(this, sizeof(objc_stringhash_t));
 275             return NULL;
 276         }
 277
 278         perfect_hash phash = make_perfect(strings);
 279         if (phash.capacity == 0) {
 280             return "perfect hash failed (metadata not optimized)";
 281         }
 282
 283         // Set header
 284         capacity = phash.capacity;
 285         occupied = phash.occupied;
 286         shift = phash.shift;
 287         mask = phash.mask;
 288         unused1 = 0;
 289         unused2 = 0;
 290         salt = phash.salt;
 291
 292         if (size() > remaining) {
 293             return "selector section too small (metadata not optimized)";
 294         }
 295
 296         // Set hash data
 297         for (uint32_t i = 0; i < 256; i++) {
 298             scramble[i] = phash.scramble[i];
 299         }
 300         for (uint32_t i = 0; i < phash.mask+1; i++) {
 301             tab[i] = phash.tab[i];
 302         }
 303
 304         // Set offsets to 0
 305         for (uint32_t i = 0; i < phash.capacity; i++) {
 306             offsets()[i] = 0;
 307         }
 308         // Set checkbytes to 0
 309         for (uint32_t i = 0; i < phash.capacity; i++) {
 310             checkbytes()[i] = 0;
 311         }
 312
 313         // Set real string offsets and checkbytes
 314 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 315         string_map::const_iterator s;
 316         for (s = strings.begin(); s != strings.end(); ++s) {
 317             int64_t offset = s->second - base;
 318             if ((offset<<SHIFT)>>SHIFT != offset) {
 319                 return "selector offset too big (metadata not optimized)";
 320             }
 321
 322             uint32_t h = hash(s->first);
 323             offsets()[h] = (objc_stringhash_offset_t)offset;
 324             checkbytes()[h] = checkbyte(s->first);
 325         }
 326 #       undef SHIFT
 327
 328         return NULL;
 329     }
 330
 331 // SELOPT_WRITE
 332 #endif
 333 };
 334
 335
 336 // Precomputed selector table.
 337 // Edit objc-sel-table.s if you change this structure.
 338 struct objc_selopt_t : objc_stringhash_t {
 339     const char *get(const char *key) const
 340     {
 341         uint32_t h = getIndex(key);
 342         if (h == INDEX_NOT_FOUND) return NULL;
 343
 344         return (const char *)this + offsets()[h];
 345     }
 346 };
 347
 348 // Precomputed class list.
 349 // Edit objc-sel-table.s if you change these structures.
 350
 351 struct objc_classheader_t {
 352     objc_stringhash_offset_t clsOffset;
 353     objc_stringhash_offset_t hiOffset;
 354
 355     // For duplicate class names:
 356     // clsOffset = count<<1 | 1
 357     // duplicated classes are duplicateOffsets[hiOffset..hiOffset+count-1]
 358     bool isDuplicate() const { return clsOffset & 1; }
 359     uint32_t duplicateCount() const { return clsOffset >> 1; }
 360     uint32_t duplicateIndex() const { return hiOffset; }
 361 };
 362
 363
 364 struct objc_clsopt_t : objc_stringhash_t {
 365     // ...objc_stringhash_t fields...
 366     // objc_classheader_t classOffsets[capacity]; /* offsets from &capacity to class_t and header_info */
 367     // uint32_t duplicateCount;
 368     // objc_classheader_t duplicateOffsets[duplicatedClasses];
 369
 370     objc_classheader_t *classOffsets() { return (objc_classheader_t *)&offsets()[capacity]; }
 371     const objc_classheader_t *classOffsets() const { return (const objc_classheader_t *)&offsets()[capacity]; }
 372
 373     uint32_t& duplicateCount() { return *(uint32_t *)&classOffsets()[capacity]; }
 374     const uint32_t& duplicateCount() const { return *(const uint32_t *)&classOffsets()[capacity]; }
 375
 376     objc_classheader_t *duplicateOffsets() { return (objc_classheader_t *)(&duplicateCount()+1); }
 377     const objc_classheader_t *duplicateOffsets() const { return (const objc_classheader_t *)(&duplicateCount()+1); }
 378
 379     // 0/NULL/NULL: not found
 380     // 1/ptr/ptr: found exactly one
 381     // n/NULL/NULL:  found N - use getClassesAndHeaders() instead
 382     uint32_t getClassAndHeader(const char *key, void*& cls, void*& hi) const
 383     {
 384         uint32_t h = getIndex(key);
 385         if (h == INDEX_NOT_FOUND) {
 386             cls = NULL;
 387             hi = NULL;
 388             return 0;
 389         }
 390
 391         const objc_classheader_t& clshi = classOffsets()[h];
 392         if (! clshi.isDuplicate()) {
 393             // class appears in exactly one header
 394             cls = (void *)((const char *)this + clshi.clsOffset);
 395             hi  = (void *)((const char *)this + clshi.hiOffset);
 396             return 1;
 397         }
 398         else {
 399             // class appears in more than one header - use getClassesAndHeaders
 400             cls = NULL;
 401             hi = NULL;
 402             return clshi.duplicateCount();
 403         }
 404     }
 405
 406     void getClassesAndHeaders(const char *key, void **cls, void **hi) const
 407     {
 408         uint32_t h = getIndex(key);
 409         if (h == INDEX_NOT_FOUND) return;
 410
 411         const objc_classheader_t& clshi = classOffsets()[h];
 412         if (! clshi.isDuplicate()) {
 413             // class appears in exactly one header
 414             cls[0] = (void *)((const char *)this + clshi.clsOffset);
 415             hi[0]  = (void *)((const char *)this + clshi.hiOffset);
 416         }
 417         else {
 418             // class appears in more than one header
 419             uint32_t count = clshi.duplicateCount();
 420             const objc_classheader_t *list =
 421                 &duplicateOffsets()[clshi.duplicateIndex()];
 422             for (uint32_t i = 0; i < count; i++) {
 423                 cls[i] = (void *)((const char *)this + list[i].clsOffset);
 424                 hi[i]  = (void *)((const char *)this + list[i].hiOffset);
 425             }
 426         }
 427     }
 428
 429 #ifdef SELOPT_WRITE
 430
 431     size_t size()
 432     {
 433         return
 434             objc_stringhash_t::size()
 435             + capacity * sizeof(objc_classheader_t)
 436             + sizeof(duplicateCount())
 437             + duplicateCount() * sizeof(objc_classheader_t);
 438     }
 439
 440     void byteswap(bool little_endian)
 441     {
 442         objc_classheader_t *o;
 443
 444         o = classOffsets();
 445         for (uint32_t i = 0; i < capacity; i++) {
 446             S32(o[i].clsOffset);
 447             S32(o[i].hiOffset);
 448         }
 449
 450         o = duplicateOffsets();
 451         for (uint32_t i = 0; i < duplicateCount(); i++) {
 452             S32(o[i].clsOffset);
 453             S32(o[i].hiOffset);
 454         }
 455
 456         S32(duplicateCount());
 457
 458         objc_stringhash_t::byteswap(little_endian);
 459     }
 460
 461     const char *write(uint64_t base, size_t remaining,
 462                       string_map& strings, class_map& classes, bool verbose)
 463     {
 464         const char *err;
 465         err = objc_stringhash_t::write(base, remaining, strings);
 466         if (err) return err;
 467
 468         if (size() > remaining) {
 469             return "selector section too small (metadata not optimized)";
 470         }
 471
 472         // Set class offsets to 0
 473         for (uint32_t i = 0; i < capacity; i++) {
 474             classOffsets()[i].clsOffset = 0;
 475             classOffsets()[i].hiOffset = 0;
 476         }
 477
 478         // Set real class offsets
 479 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 480         class_map::const_iterator c;
 481         for (c = classes.begin(); c != classes.end(); ++c) {
 482             uint32_t h = getIndex(c->first);
 483             if (h == INDEX_NOT_FOUND) {
 484                 return "class list busted (metadata not optimized)";
 485             }
 486
 487             if (classOffsets()[h].clsOffset != 0) {
 488                 // already did this class
 489                 continue;
 490             }
 491
 492             uint32_t count = (uint32_t)classes.count(c->first);
 493             if (count == 1) {
 494                 // only one class with this name
 495
 496                 int64_t coff = c->second.first - base;
 497                 int64_t hoff = c->second.second - base;
 498                 if ((coff<<SHIFT)>>SHIFT != coff) {
 499                     return "class offset too big (metadata not optimized)";
 500                 }
 501                 if ((hoff<<SHIFT)>>SHIFT != hoff) {
 502                     return "header offset too big (metadata not optimized)";
 503                 }
 504
 505                 classOffsets()[h].clsOffset = (objc_stringhash_offset_t)coff;
 506                 classOffsets()[h].hiOffset  = (objc_stringhash_offset_t)hoff;
 507             }
 508             else {
 509                 // class name has duplicates - write them all now
 510                 if (verbose) {
 511                     fprintf(stderr, "update_dyld_shared_cache: %u duplicates of Objective-C class %s\n", count, c->first);
 512                 }
 513
 514                 uint32_t dest = duplicateCount();
 515                 duplicateCount() += count;
 516                 if (size() > remaining) {
 517                     return "selector section too small (metadata not optimized)";
 518                 }
 519
 520                 // classOffsets() instead contains count and array index
 521                 classOffsets()[h].clsOffset = count*2 + 1;
 522                 classOffsets()[h].hiOffset = dest;
 523
 524                 std::pair<class_map::const_iterator, class_map::const_iterator>
 525                     duplicates = classes.equal_range(c->first);
 526                 class_map::const_iterator dup;
 527                 for (dup = duplicates.first; dup != duplicates.second; ++dup) {
 528                     int64_t coff = dup->second.first - base;
 529                     int64_t hoff = dup->second.second - base;
 530                     if ((coff<<SHIFT)>>SHIFT != coff) {
 531                         return "class offset too big (metadata not optimized)";
 532                     }
 533                     if ((hoff<<SHIFT)>>SHIFT != hoff) {
 534                         return "header offset too big (metadata not optimized)";
 535                     }
 536
 537                     duplicateOffsets()[dest].clsOffset = (objc_stringhash_offset_t)coff;
 538                     duplicateOffsets()[dest].hiOffset  = (objc_stringhash_offset_t)hoff;
 539                     dest++;
 540                 }
 541             }
 542         }
 543 #       undef SHIFT
 544
 545         return NULL;
 546     }
 547
 548 // SELOPT_WRITE
 549 #endif
 550 };
 551
 552
 553
 554 struct objc_protocolopt_t : objc_stringhash_t {
 555     // ...objc_stringhash_t fields...
 556     // uint32_t protocolOffsets[capacity]; /* offsets from &capacity to protocol_t */
 557
 558     objc_stringhash_offset_t *protocolOffsets() { return (objc_stringhash_offset_t *)&offsets()[capacity]; }
 559     const objc_stringhash_offset_t *protocolOffsets() const { return (const objc_stringhash_offset_t *)&offsets()[capacity]; }
 560
 561     void* getProtocol(const char *key) const
 562     {
 563         uint32_t h = getIndex(key);
 564         if (h == INDEX_NOT_FOUND) {
 565             return NULL;
 566         }
 567
 568         return (void *)((const char *)this + protocolOffsets()[h]);
 569     }
 570
 571 #ifdef SELOPT_WRITE
 572
 573     size_t size()
 574     {
 575         return
 576             objc_stringhash_t::size() + capacity * sizeof(objc_stringhash_offset_t);
 577     }
 578
 579     void byteswap(bool little_endian)
 580     {
 581         objc_stringhash_offset_t *o;
 582
 583         o = protocolOffsets();
 584         for (objc_stringhash_offset_t i = 0; i < (int)capacity; i++) {
 585             S32(o[i]);
 586         }
 587
 588         objc_stringhash_t::byteswap(little_endian);
 589     }
 590
 591     const char *write(uint64_t base, size_t remaining,
 592                       string_map& strings, protocol_map& protocols,
 593                       bool verbose)
 594     {
 595         const char *err;
 596         err = objc_stringhash_t::write(base, remaining, strings);
 597         if (err) return err;
 598
 599         if (size() > remaining) {
 600             return "selector section too small (metadata not optimized)";
 601         }
 602
 603         // Set protocol offsets to 0
 604         for (uint32_t i = 0; i < capacity; i++) {
 605             protocolOffsets()[i] = 0;
 606         }
 607
 608         // Set real protocol offsets
 609 #       define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t))
 610         protocol_map::const_iterator c;
 611         for (c = protocols.begin(); c != protocols.end(); ++c) {
 612             uint32_t h = getIndex(c->first);
 613             if (h == INDEX_NOT_FOUND) {
 614                 return "protocol list busted (metadata not optimized)";
 615             }
 616
 617             int64_t offset = c->second - base;
 618             if ((offset<<SHIFT)>>SHIFT != offset) {
 619                 return "protocol offset too big (metadata not optimized)";
 620             }
 621
 622             protocolOffsets()[h] = (objc_stringhash_offset_t)offset;
 623         }
 624 #       undef SHIFT
 625
 626         return NULL;
 627     }
 628
 629 // SELOPT_WRITE
 630 #endif
 631 };
 632
 633
 634 // Precomputed image list.
 635 struct objc_headeropt_ro_t;
 636
 637 // Precomputed image list.
 638 struct objc_headeropt_rw_t;
 639
 640 // Precomputed class list.
 641 struct objc_clsopt_t;
 642
 643 // Edit objc-sel-table.s if you change this value.
 644 // lldb and Symbolication read these structures. Inform them of any changes.
 645 enum { VERSION = 15 };
 646
 647 // Values for objc_opt_t::flags
 648 enum : uint32_t {
 649     IsProduction = (1 << 0),               // never set in development cache
 650     NoMissingWeakSuperclasses = (1 << 1),  // never set in development cache
 651 };
 652
 653 // Top-level optimization structure.
 654 // Edit objc-sel-table.s if you change this structure.
 655 struct alignas(alignof(void*)) objc_opt_t {
 656     uint32_t version;
 657     uint32_t flags;
 658     int32_t selopt_offset;
 659     int32_t headeropt_ro_offset;
 660     int32_t clsopt_offset;
 661     int32_t protocolopt_offset;
 662     int32_t headeropt_rw_offset;
 663
 664     const objc_selopt_t* selopt() const {
 665         if (selopt_offset == 0) return NULL;
 666         return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
 667     }
 668     objc_selopt_t* selopt() {
 669         if (selopt_offset == 0) return NULL;
 670         return (objc_selopt_t *)((uint8_t *)this + selopt_offset);
 671     }
 672
 673     struct objc_headeropt_ro_t* headeropt_ro() const {
 674         if (headeropt_ro_offset == 0) return NULL;
 675         return (struct objc_headeropt_ro_t *)((uint8_t *)this + headeropt_ro_offset);
 676     }
 677
 678     struct objc_clsopt_t* clsopt() const {
 679         if (clsopt_offset == 0) return NULL;
 680         return (objc_clsopt_t *)((uint8_t *)this + clsopt_offset);
 681     }
 682
 683     struct objc_protocolopt_t* protocolopt() const {
 684         if (protocolopt_offset == 0) return NULL;
 685         return (objc_protocolopt_t *)((uint8_t *)this + protocolopt_offset);
 686     }
 687
 688     struct objc_headeropt_rw_t* headeropt_rw() const {
 689         if (headeropt_rw_offset == 0) return NULL;
 690         return (struct objc_headeropt_rw_t *)((uint8_t *)this + headeropt_rw_offset);
 691     }
 692 };
 693
 694 // sizeof(objc_opt_t) must be pointer-aligned
 695 STATIC_ASSERT(sizeof(objc_opt_t) % sizeof(void*) == 0);
 696
 697
 698 // List of offsets in libobjc that the shared cache optimization needs to use.
 699 template <typename T>
 700 struct objc_opt_pointerlist_tt {
 701     T protocolClass;
 702 };
 703 typedef struct objc_opt_pointerlist_tt<uintptr_t> objc_opt_pointerlist_t;
 704
 705
 706 /*
 707 --------------------------------------------------------------------
 708 mix -- mix 3 64-bit values reversibly.
 709 mix() takes 48 machine instructions, but only 24 cycles on a superscalar
 710   machine (like Intel's new MMX architecture).  It requires 4 64-bit
 711   registers for 4::2 parallelism.
 712 All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
 713   (a,b,c), and all deltas of bottom bits were tested.  All deltas were
 714   tested both on random keys and on keys that were nearly all zero.
 715   These deltas all cause every bit of c to change between 1/3 and 2/3
 716   of the time (well, only 113/400 to 287/400 of the time for some
 717   2-bit delta).  These deltas all cause at least 80 bits to change
 718   among (a,b,c) when the mix is run either forward or backward (yes it
 719   is reversible).
 720 This implies that a hash using mix64 has no funnels.  There may be
 721   characteristics with 3-bit deltas or bigger, I didn't test for
 722   those.
 723 --------------------------------------------------------------------
 724 */
 725 #define mix64(a,b,c) \
 726 { \
 727   a -= b; a -= c; a ^= (c>>43); \
 728   b -= c; b -= a; b ^= (a<<9); \
 729   c -= a; c -= b; c ^= (b>>8); \
 730   a -= b; a -= c; a ^= (c>>38); \
 731   b -= c; b -= a; b ^= (a<<23); \
 732   c -= a; c -= b; c ^= (b>>5); \
 733   a -= b; a -= c; a ^= (c>>35); \
 734   b -= c; b -= a; b ^= (a<<49); \
 735   c -= a; c -= b; c ^= (b>>11); \
 736   a -= b; a -= c; a ^= (c>>12); \
 737   b -= c; b -= a; b ^= (a<<18); \
 738   c -= a; c -= b; c ^= (b>>22); \
 739 }
 740
 741 /*
 742 --------------------------------------------------------------------
 743 hash() -- hash a variable-length key into a 64-bit value
 744   k     : the key (the unaligned variable-length array of bytes)
 745   len   : the length of the key, counting by bytes
 746   level : can be any 8-byte value
 747 Returns a 64-bit value.  Every bit of the key affects every bit of
 748 the return value.  No funnels.  Every 1-bit and 2-bit delta achieves
 749 avalanche.  About 41+5len instructions.
 750
 751 The best hash table sizes are powers of 2.  There is no need to do
 752 mod a prime (mod is sooo slow!).  If you need less than 64 bits,
 753 use a bitmask.  For example, if you need only 10 bits, do
 754   h = (h & hashmask(10));
 755 In which case, the hash table should have hashsize(10) elements.
 756
 757 If you are hashing n strings (uint8_t **)k, do it like this:
 758   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
 759
 760 By Bob Jenkins, Jan 4 1997.  bob_jenkins@burtleburtle.net.  You may
 761 use this code any way you wish, private, educational, or commercial,
 762 but I would appreciate if you give me credit.
 763
 764 See http://burtleburtle.net/bob/hash/evahash.html
 765 Use for hash table lookup, or anything where one collision in 2^^64
 766 is acceptable.  Do NOT use for cryptographic purposes.
 767 --------------------------------------------------------------------
 768 */
 769
 770 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level)
 771 // uint8_t *k;        /* the key */
 772 // uint64_t  length;   /* the length of the key */
 773 // uint64_t  level;    /* the previous hash, or an arbitrary value */
 774 {
 775   uint64_t a,b,c;
 776   size_t len;
 777
 778   /* Set up the internal state */
 779   len = length;
 780   a = b = level;                         /* the previous hash value */
 781   c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
 782
 783   /*---------------------------------------- handle most of the key */
 784   while (len >= 24)
 785   {
 786     a += (k[0]        +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24)
 787      +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56));
 788     b += (k[8]        +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24)
 789      +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56));
 790     c += (k[16]       +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24)
 791      +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56));
 792     mix64(a,b,c);
 793     k += 24; len -= 24;
 794   }
 795
 796   /*------------------------------------- handle the last 23 bytes */
 797   c += length;
 798   switch(len)              /* all the case statements fall through */
 799   {
 800   case 23: c+=((uint64_t)k[22]<<56);
 801   case 22: c+=((uint64_t)k[21]<<48);
 802   case 21: c+=((uint64_t)k[20]<<40);
 803   case 20: c+=((uint64_t)k[19]<<32);
 804   case 19: c+=((uint64_t)k[18]<<24);
 805   case 18: c+=((uint64_t)k[17]<<16);
 806   case 17: c+=((uint64_t)k[16]<<8);
 807     /* the first byte of c is reserved for the length */
 808   case 16: b+=((uint64_t)k[15]<<56);
 809   case 15: b+=((uint64_t)k[14]<<48);
 810   case 14: b+=((uint64_t)k[13]<<40);
 811   case 13: b+=((uint64_t)k[12]<<32);
 812   case 12: b+=((uint64_t)k[11]<<24);
 813   case 11: b+=((uint64_t)k[10]<<16);
 814   case 10: b+=((uint64_t)k[ 9]<<8);
 815   case  9: b+=((uint64_t)k[ 8]);
 816   case  8: a+=((uint64_t)k[ 7]<<56);
 817   case  7: a+=((uint64_t)k[ 6]<<48);
 818   case  6: a+=((uint64_t)k[ 5]<<40);
 819   case  5: a+=((uint64_t)k[ 4]<<32);
 820   case  4: a+=((uint64_t)k[ 3]<<24);
 821   case  3: a+=((uint64_t)k[ 2]<<16);
 822   case  2: a+=((uint64_t)k[ 1]<<8);
 823   case  1: a+=((uint64_t)k[ 0]);
 824     /* case 0: nothing left to add */
 825   }
 826   mix64(a,b,c);
 827   /*-------------------------------------------- report the result */
 828   return c;
 829 }
 830
 831
 832 #ifdef SELOPT_WRITE
 833
 834 /*
 835 ------------------------------------------------------------------------------
 836 This generates a minimal perfect hash function.  That means, given a
 837 set of n keys, this determines a hash function that maps each of
 838 those keys into a value in 0..n-1 with no collisions.
 839
 840 The perfect hash function first uses a normal hash function on the key
 841 to determine (a,b) such that the pair (a,b) is distinct for all
 842 keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
 843 tab[] is an array of 1-byte values and scramble[] is a 256-term array of
 844 2-byte or 4-byte values.  If there are n keys, the length of tab[] is a
 845 power of two between n/3 and n.
 846
 847 I found the idea of computing distinct (a,b) values in "Practical minimal
 848 perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
 849 Communications of the ACM, January 1992.  They found the idea in Chichelli
 850 (CACM Jan 1980).  Beyond that, our methods differ.
 851
 852 The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
 853 0..*blen*-1.  A fast hash function determines both a and b
 854 simultaneously.  Any decent hash function is likely to produce
 855 hashes so that (a,b) is distinct for all pairs.  I try the hash
 856 using different values of *salt* until all pairs are distinct.
 857
 858 The final hash is (a XOR scramble[tab[b]]).  *scramble* is a
 859 predetermined mapping of 0..255 into 0..smax-1.  *tab* is an
 860 array that we fill in in such a way as to make the hash perfect.
 861
 862 First we fill in all values of *tab* that are used by more than one
 863 key.  We try all possible values for each position until one works.
 864
 865 This leaves m unmapped keys and m values that something could hash to.
 866 If you treat unmapped keys as lefthand nodes and unused hash values
 867 as righthand nodes, and draw a line connecting each key to each hash
 868 value it could map to, you get a bipartite graph.  We attempt to
 869 find a perfect matching in this graph.  If we succeed, we have
 870 determined a perfect hash for the whole set of keys.
 871
 872 *scramble* is used because (a^tab[i]) clusters keys around *a*.
 873 ------------------------------------------------------------------------------
 874 */
 875
 876 typedef uint64_t  ub8;
 877 #define UB8MAXVAL 0xffffffffffffffffLL
 878 #define UB8BITS 64
 879 typedef uint32_t  ub4;
 880 #define UB4MAXVAL 0xffffffff
 881 #define UB4BITS 32
 882 typedef uint16_t  ub2;
 883 #define UB2MAXVAL 0xffff
 884 #define UB2BITS 16
 885 typedef uint8_t ub1;
 886 #define UB1MAXVAL 0xff
 887 #define UB1BITS 8
 888
 889 #define TRUE  1
 890 #define FALSE 0
 891
 892 #define SCRAMBLE_LEN 256 // ((ub4)1<<16)                    /* length of *scramble* */
 893 #define RETRY_INITKEY 2048  /* number of times to try to find distinct (a,b) */
 894 #define RETRY_PERFECT 4     /* number of times to try to make a perfect hash */
 895
 896
 897 /* representation of a key */
 898 struct key
 899 {
 900   ub1        *name_k;                                      /* the actual key */
 901   ub4         len_k;                         /* the length of the actual key */
 902   ub4         hash_k;                 /* the initial hash value for this key */
 903 /* beyond this point is mapping-dependent */
 904   ub4         a_k;                            /* a, of the key maps to (a,b) */
 905   ub4         b_k;                            /* b, of the key maps to (a,b) */
 906   struct key *nextb_k;                               /* next key with this b */
 907 };
 908 typedef  struct key  key;
 909
 910 /* things indexed by b of original (a,b) pair */
 911 struct bstuff
 912 {
 913   ub2  val_b;                                        /* hash=a^tabb[b].val_b */
 914   key *list_b;                   /* tabb[i].list_b is list of keys with b==i */
 915   ub4  listlen_b;                                        /* length of list_b */
 916   ub4  water_b;           /* high watermark of who has visited this map node */
 917 };
 918 typedef  struct bstuff  bstuff;
 919
 920 /* things indexed by final hash value */
 921 struct hstuff
 922 {
 923   key *key_h;                   /* tabh[i].key_h is the key with a hash of i */
 924 };
 925 typedef  struct hstuff hstuff;
 926
 927 /* things indexed by queue position */
 928 struct qstuff
 929 {
 930   bstuff *b_q;                        /* b that currently occupies this hash */
 931   ub4     parent_q;     /* queue position of parent that could use this hash */
 932   ub2     newval_q;      /* what to change parent tab[b] to to use this hash */
 933   ub2     oldval_q;                              /* original value of tab[b] */
 934 };
 935 typedef  struct qstuff  qstuff;
 936
 937
 938 /*
 939 ------------------------------------------------------------------------------
 940 Find the mapping that will produce a perfect hash
 941 ------------------------------------------------------------------------------
 942 */
 943
 944 /* return the ceiling of the log (base 2) of val */
 945 static ub4  log2u(ub4 val)
 946 {
 947   ub4 i;
 948   for (i=0; ((ub4)1<<i) < val; ++i)
 949     ;
 950   return i;
 951 }
 952
 953 /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
 954 /* permute(0)=0.  This is intended and useful. */
 955 static ub4  permute(ub4 x, ub4 nbits)
 956 // ub4 x;                                       /* input, a value in some range */
 957 // ub4 nbits;                                 /* input, number of bits in range */
 958 {
 959   int i;
 960   int mask   = ((ub4)1<<nbits)-1;                                /* all ones */
 961   int const2 = 1+nbits/2;
 962   int const3 = 1+nbits/3;
 963   int const4 = 1+nbits/4;
 964   int const5 = 1+nbits/5;
 965   for (i=0; i<20; ++i)
 966   {
 967     x = (x+(x<<const2)) & mask;
 968     x = (x^(x>>const3));
 969     x = (x+(x<<const4)) & mask;
 970     x = (x^(x>>const5));
 971   }
 972   return x;
 973 }
 974
 975 /* initialize scramble[] with distinct random values in 0..smax-1 */
 976 static void scrambleinit(ub4 *scramble, ub4 smax)
 977 // ub4      *scramble;                            /* hash is a^scramble[tab[b]] */
 978 // ub4       smax;                    /* scramble values should be in 0..smax-1 */
 979 {
 980   ub4 i;
 981
 982   /* fill scramble[] with distinct random integers in 0..smax-1 */
 983   for (i=0; i<SCRAMBLE_LEN; ++i)
 984   {
 985     scramble[i] = permute(i, log2u(smax));
 986   }
 987 }
 988
 989
 990 /*
 991  * put keys in tabb according to key->b_k
 992  * check if the initial hash might work
 993  */
 994 static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete)
 995 // bstuff   *tabb;                     /* output, list of keys with b for (a,b) */
 996 // ub4       blen;                                            /* length of tabb */
 997 // key      *keys;                               /* list of keys already hashed */
 998 // int       complete;        /* TRUE means to complete init despite collisions */
 999 {
1000   int  nocollision = TRUE;
1001   ub4 i;
1002
1003   memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
1004
1005   /* Two keys with the same (a,b) guarantees a collision */
1006   for (i = 0; i < nkeys; i++) {
1007     key *mykey = keys+i;
1008     key *otherkey;
1009
1010     for (otherkey=tabb[mykey->b_k].list_b;
1011          otherkey;
1012          otherkey=otherkey->nextb_k)
1013     {
1014       if (mykey->a_k == otherkey->a_k)
1015       {
1016         nocollision = FALSE;
1017         if (!complete)
1018           return FALSE;
1019       }
1020     }
1021     ++tabb[mykey->b_k].listlen_b;
1022     mykey->nextb_k = tabb[mykey->b_k].list_b;
1023     tabb[mykey->b_k].list_b = mykey;
1024   }
1025
1026   /* no two keys have the same (a,b) pair */
1027   return nocollision;
1028 }
1029
1030
1031 /* Do the initial hash for normal mode (use lookup and checksum) */
1032 static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt)
1033 // key      *keys;                                          /* list of all keys */
1034 // ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
1035 // ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
1036 // ub4       smax;                   /* maximum range of computable hash values */
1037 // ub4       salt;                     /* used to initialize the hash function */
1038 // gencode  *final;                          /* output, code for the final hash */
1039 {
1040   ub4 loga = log2u(alen);                            /* log based 2 of blen */
1041   dispatch_apply(nkeys, DISPATCH_APPLY_AUTO, ^(size_t index) {
1042     ub4 i = (ub4)index;
1043     key *mykey = keys+i;
1044     ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt);
1045     mykey->a_k = (loga > 0) ? (ub4)(hash >> (UB8BITS-loga)) : 0;
1046     mykey->b_k = (blen > 1) ? (hash & (blen-1)) : 0;
1047   });
1048 }
1049
1050
1051 /* Try to apply an augmenting list */
1052 static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback)
1053 // bstuff *tabb;
1054 // hstuff *tabh;
1055 // qstuff *tabq;
1056 // ub4     blen;
1057 // ub4    *scramble;
1058 // ub4     tail;
1059 // int     rollback;          /* FALSE applies augmenting path, TRUE rolls back */
1060 {
1061   ub4     hash;
1062   key    *mykey;
1063   bstuff *pb;
1064   ub4     child;
1065   ub4     parent;
1066   ub4     stabb;                                         /* scramble[tab[b]] */
1067
1068   /* walk from child to parent */
1069   for (child=tail-1; child; child=parent)
1070   {
1071     parent = tabq[child].parent_q;                    /* find child's parent */
1072     pb     = tabq[parent].b_q;             /* find parent's list of siblings */
1073
1074     /* erase old hash values */
1075     stabb = scramble[pb->val_b];
1076     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
1077     {
1078       hash = mykey->a_k^stabb;
1079       if (mykey == tabh[hash].key_h)
1080       {                            /* erase hash for all of child's siblings */
1081         tabh[hash].key_h = (key *)0;
1082       }
1083     }
1084
1085     /* change pb->val_b, which will change the hashes of all parent siblings */
1086     pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
1087
1088     /* set new hash values */
1089     stabb = scramble[pb->val_b];
1090     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
1091     {
1092       hash = mykey->a_k^stabb;
1093       if (rollback)
1094       {
1095         if (parent == 0) continue;                  /* root never had a hash */
1096       }
1097       else if (tabh[hash].key_h)
1098       {
1099         /* very rare: roll back any changes */
1100         apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
1101         return FALSE;                                  /* failure, collision */
1102       }
1103       tabh[hash].key_h = mykey;
1104     }
1105   }
1106   return TRUE;
1107 }
1108
1109
1110 /*
1111 -------------------------------------------------------------------------------
1112 augment(): Add item to the mapping.
1113
1114 Construct a spanning tree of *b*s with *item* as root, where each
1115 parent can have all its hashes changed (by some new val_b) with
1116 at most one collision, and each child is the b of that collision.
1117
1118 I got this from Tarjan's "Data Structures and Network Algorithms".  The
1119 path from *item* to a *b* that can be remapped with no collision is
1120 an "augmenting path".  Change values of tab[b] along the path so that
1121 the unmapped key gets mapped and the unused hash value gets used.
1122
1123 Assuming 1 key per b, if m out of n hash values are still unused,
1124 you should expect the transitive closure to cover n/m nodes before
1125 an unused node is found.  Sum(i=1..n)(n/i) is about nlogn, so expect
1126 this approach to take about nlogn time to map all single-key b's.
1127 -------------------------------------------------------------------------------
1128 */
1129 static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys,
1130                    ub4 highwater)
1131 // bstuff   *tabb;                                        /* stuff indexed by b */
1132 // hstuff   *tabh;  /* which key is associated with which hash, indexed by hash */
1133 // qstuff   *tabq;            /* queue of *b* values, this is the spanning tree */
1134 // ub4       blen;                                            /* length of tabb */
1135 // ub4      *scramble;                      /* final hash is a^scramble[tab[b]] */
1136 // ub4       smax;                                 /* highest value in scramble */
1137 // bstuff   *item;                           /* &tabb[b] for the b to be mapped */
1138 // ub4       nkeys;                         /* final hash must be in 0..nkeys-1 */
1139 // ub4       highwater;        /* a value higher than any now in tabb[].water_b */
1140 {
1141   ub4  q;                      /* current position walking through the queue */
1142   ub4  tail;              /* tail of the queue.  0 is the head of the queue. */
1143   ub4  limit=UB1MAXVAL+1;
1144   ub4  highhash = smax;
1145
1146   /* initialize the root of the spanning tree */
1147   tabq[0].b_q = item;
1148   tail = 1;
1149
1150   /* construct the spanning tree by walking the queue, add children to tail */
1151   for (q=0; q<tail; ++q)
1152   {
1153     bstuff *myb = tabq[q].b_q;                        /* the b for this node */
1154     ub4     i;                              /* possible value for myb->val_b */
1155
1156     if (q == 1)
1157       break;                                  /* don't do transitive closure */
1158
1159     for (i=0; i<limit; ++i)
1160     {
1161       bstuff *childb = (bstuff *)0;             /* the b that this i maps to */
1162       key    *mykey;                       /* for walking through myb's keys */
1163
1164       for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
1165       {
1166         key    *childkey;
1167         ub4 hash = mykey->a_k^scramble[i];
1168
1169         if (hash >= highhash) break;                        /* out of bounds */
1170         childkey = tabh[hash].key_h;
1171
1172         if (childkey)
1173         {
1174           bstuff *hitb = &tabb[childkey->b_k];
1175
1176           if (childb)
1177           {
1178             if (childb != hitb) break;            /* hit at most one child b */
1179           }
1180           else
1181           {
1182             childb = hitb;                        /* remember this as childb */
1183             if (childb->water_b == highwater) break;     /* already explored */
1184           }
1185         }
1186       }
1187       if (mykey) continue;             /* myb with i has multiple collisions */
1188
1189       /* add childb to the queue of reachable things */
1190       if (childb) childb->water_b = highwater;
1191       tabq[tail].b_q      = childb;
1192       tabq[tail].newval_q = i;     /* how to make parent (myb) use this hash */
1193       tabq[tail].oldval_q = myb->val_b;            /* need this for rollback */
1194       tabq[tail].parent_q = q;
1195       ++tail;
1196
1197       if (!childb)
1198       {                                  /* found an *i* with no collisions? */
1199         /* try to apply the augmenting path */
1200         if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
1201           return TRUE;        /* success, item was added to the perfect hash */
1202
1203         --tail;                    /* don't know how to handle such a child! */
1204       }
1205     }
1206   }
1207   return FALSE;
1208 }
1209
1210
1211 /* find a mapping that makes this a perfect hash */
1212 static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys)
1213 {
1214   ub4 maxkeys;                           /* maximum number of keys for any b */
1215   ub4 i, j;
1216
1217 #if SELOPT_DEBUG
1218   fprintf(stderr, "           blen %d smax %d nkeys %d\n", blen, smax, nkeys);
1219 #endif
1220
1221   /* clear any state from previous attempts */
1222   memset((void *)tabh, 0, sizeof(hstuff)*smax);
1223   memset((void *)tabq, 0, sizeof(qstuff)*(blen+1));
1224
1225   for (maxkeys=0,i=0; i<blen; ++i)
1226     if (tabb[i].listlen_b > maxkeys)
1227       maxkeys = tabb[i].listlen_b;
1228
1229   /* In descending order by number of keys, map all *b*s */
1230   for (j=maxkeys; j>0; --j)
1231     for (i=0; i<blen; ++i)
1232       if (tabb[i].listlen_b == j)
1233         if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys,
1234                      i+1))
1235         {
1236           return FALSE;
1237         }
1238
1239   /* Success!  We found a perfect hash of all keys into 0..nkeys-1. */
1240   return TRUE;
1241 }
1242
1243
1244 /* guess initial values for alen and blen */
1245 static void initalen(ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys)
1246 // ub4      *alen;                                      /* output, initial alen */
1247 // ub4      *blen;                                      /* output, initial blen */
1248 // ub4      smax;    /* input, power of two greater or equal to max hash value */
1249 // ub4       nkeys;                              /* number of keys being hashed */
1250 {
1251   /*
1252    * Find initial *alen, *blen
1253    * Initial alen and blen values were found empirically.  Some factors:
1254    *
1255    * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
1256    *
1257    * alen and blen must be powers of 2 because the values in 0..alen-1 and
1258    * 0..blen-1 are produced by applying a bitmask to the initial hash function.
1259    *
1260    * alen must be less than smax, in fact less than nkeys, because otherwise
1261    * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
1262    * all the *a*s associated with a given *b*, so there would be no legal
1263    * value to assign to tab[b].  This only matters when we're doing a minimal
1264    * perfect hash.
1265    *
1266    * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
1267    * and alen*blen = smax*smax/32.
1268    *
1269    * Values of blen less than smax/4 never work, and smax/2 always works.
1270    *
1271    * We want blen as small as possible because it is the number of bytes in
1272    * the huge array we must create for the perfect hash.
1273    *
1274    * When nkey <= smax*(5/8), blen=smax/4 works much more often with
1275    * alen=smax/8 than with alen=smax/4.  Above smax*(5/8), blen=smax/4
1276    * doesn't seem to care whether alen=smax/8 or alen=smax/4.  I think it
1277    * has something to do with 5/8 = 1/8 * 5.  For example examine 80000,
1278    * 85000, and 90000 keys with different values of alen.  This only matters
1279    * if we're doing a minimal perfect hash.
1280    *
1281    * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
1282    * Bigger than that it must produce two integers, which increases the
1283    * cost of the hash per character hashed.
1284    */
1285   *alen = smax;                     /* no reason to restrict alen to smax/2 */
1286   *blen = ((nkeys <= smax*0.6) ? smax/16 :
1287            (nkeys <= smax*0.8) ? smax/8 : smax/4);
1288
1289   if (*alen < 1) *alen = 1;
1290   if (*blen < 1) *blen = 1;
1291
1292 #if SELOPT_DEBUG
1293   fprintf(stderr, "alen %d blen %d smax %d nkeys %d\n", *alen, *blen, smax, nkeys);
1294 #endif
1295 }
1296
1297 /*
1298 ** Try to find a perfect hash function.
1299 ** Return the successful initializer for the initial hash.
1300 ** Return 0 if no perfect hash could be found.
1301 */
1302 static int findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub8 *salt,
1303                     ub4 *scramble, ub4 smax, key *keys, ub4 nkeys)
1304 // bstuff  **tabb;           /* output, tab[] of the perfect hash, length *blen */
1305 // ub4      *alen;                 /* output, 0..alen-1 is range for a of (a,b) */
1306 // ub4      *blen;                 /* output, 0..blen-1 is range for b of (a,b) */
1307 // ub4      *salt;                         /* output, initializes initial hash */
1308 // ub4      *scramble;                      /* input, hash = a^scramble[tab[b]] */
1309 // ub4      smax;                           /* input, scramble[i] in 0..smax-1 */
1310 // key      *keys;                                       /* input, keys to hash */
1311 // ub4       nkeys;                       /* input, number of keys being hashed */
1312 {
1313   ub4 bad_initkey;                       /* how many times did initkey fail? */
1314   ub4 bad_perfect;                       /* how many times did perfect fail? */
1315   ub4 si;                        /* trial initializer for initial hash */
1316   ub4 maxalen;
1317   hstuff *tabh;                       /* table of keys indexed by hash value */
1318   qstuff *tabq;    /* table of stuff indexed by queue value, used by augment */
1319
1320   /* guess initial values for alen and blen */
1321   initalen(alen, blen, smax, nkeys);
1322
1323   scrambleinit(scramble, smax);
1324
1325   maxalen = smax;
1326
1327   /* allocate working memory */
1328   *tabb = new bstuff[*blen];
1329   tabq  = new qstuff[*blen+1];
1330   tabh  = new hstuff[smax];
1331
1332   /* Actually find the perfect hash */
1333   *salt = 0;
1334   bad_initkey = 0;
1335   bad_perfect = 0;
1336   for (si=1; ; ++si)
1337   {
1338     ub4 rslinit;
1339     /* Try to find distinct (A,B) for all keys */
1340     *salt = si * 0x9e3779b97f4a7c13LL; /* golden ratio (arbitrary value) */
1341     initnorm(keys, nkeys, *alen, *blen, smax, *salt);
1342     rslinit = inittab(*tabb, *blen, keys, nkeys, FALSE);
1343     if (rslinit == 0)
1344     {
1345       /* didn't find distinct (a,b) */
1346       if (++bad_initkey >= RETRY_INITKEY)
1347       {
1348         /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
1349         if (*alen < maxalen)
1350         {
1351           *alen *= 2;
1352         }
1353         else if (*blen < smax)
1354         {
1355           *blen *= 2;
1356           delete[] tabq;
1357           delete[] *tabb;
1358           *tabb  = new bstuff[*blen];
1359           tabq  = new qstuff[*blen+1];
1360         }
1361         bad_initkey = 0;
1362         bad_perfect = 0;
1363       }
1364       continue;                             /* two keys have same (a,b) pair */
1365     }
1366
1367     /* Given distinct (A,B) for all keys, build a perfect hash */
1368     if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys))
1369     {
1370       if (++bad_perfect >= RETRY_PERFECT)
1371       {
1372         if (*blen < smax)
1373         {
1374           *blen *= 2;
1375           delete[] *tabb;
1376           delete[] tabq;
1377           *tabb  = new bstuff[*blen];
1378           tabq  = new qstuff[*blen+1];
1379           --si;               /* we know this salt got distinct (A,B) */
1380         }
1381         else
1382         {
1383           return 0;
1384         }
1385         bad_perfect = 0;
1386       }
1387       continue;
1388     }
1389
1390     break;
1391   }
1392
1393   /* free working memory */
1394   delete[] tabh;
1395   delete[] tabq;
1396
1397   return 1;
1398 }
1399
1400 /*
1401 ------------------------------------------------------------------------------
1402 Input/output type routines
1403 ------------------------------------------------------------------------------
1404 */
1405
1406 /* get the list of keys */
1407 static void getkeys(key **keys, ub4 *nkeys, const string_map& strings)
1408 {
1409   key *buf = new key[strings.size()];
1410   size_t i;
1411   string_map::const_iterator s;
1412   for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) {
1413     key *mykey = buf+i;
1414     mykey->name_k = (ub1 *)s->first;
1415     mykey->len_k  = (ub4)strlen(s->first);
1416   }
1417   *keys = buf;
1418   *nkeys = (ub4)strings.size();
1419 }
1420
1421
1422 static perfect_hash
1423 make_perfect(const string_map& strings)
1424 {
1425   ub4       nkeys;                                         /* number of keys */
1426   key      *keys;                                    /* head of list of keys */
1427   bstuff   *tab;                                       /* table indexed by b */
1428   ub4       smax;            /* scramble[] values in 0..smax-1, a power of 2 */
1429   ub4       alen;                            /* a in 0..alen-1, a power of 2 */
1430   ub4       blen;                            /* b in 0..blen-1, a power of 2 */
1431   ub8       salt;                       /* a parameter to the hash function */
1432   ub4       scramble[SCRAMBLE_LEN];           /* used in final hash function */
1433   int ok;
1434   uint32_t i;
1435   perfect_hash result;
1436
1437   /* read in the list of keywords */
1438   getkeys(&keys, &nkeys, strings);
1439
1440   /* find the hash */
1441   smax = ((ub4)1<<log2u(nkeys));
1442   ok = findhash(&tab, &alen, &blen, &salt,
1443                 scramble, smax, keys, nkeys);
1444   if (!ok) {
1445       smax = 2 * ((ub4)1<<log2u(nkeys));
1446       ok = findhash(&tab, &alen, &blen, &salt,
1447                     scramble, smax, keys, nkeys);
1448   }
1449   if (!ok) {
1450       bzero(&result, sizeof(result));
1451   } else {
1452       /* build the tables */
1453       result.capacity = smax;
1454       result.occupied = nkeys;
1455       result.shift = UB8BITS - log2u(alen);
1456       result.mask = blen - 1;
1457       result.salt = salt;
1458
1459       result.tab = new uint8_t[blen];
1460       for (i = 0; i < blen; i++) {
1461           result.tab[i] = tab[i].val_b;
1462       }
1463       for (i = 0; i < 256; i++) {
1464           result.scramble[i] = scramble[i];
1465       }
1466   }
1467
1468   delete[] keys;
1469   delete[] tab;
1470
1471   return result;
1472 }
1473
1474 // SELOPT_WRITE
1475 #endif
1476
1477 // namespace objc_selopt
1478 };
1479
1480 #undef S32
1481 #undef S64
1482
1483 #endif