X-Git-Url: https://git.saurik.com/apple/dyld.git/blobdiff_plain/ee50caae846913bb8e5e48b51a2dd974b8f7a0e1..832b6fce7c321434378950ecd081b6c34cc3a24f:/include/objc-shared-cache.h diff --git a/include/objc-shared-cache.h b/include/objc-shared-cache.h new file mode 100644 index 0000000..b7db57d --- /dev/null +++ b/include/objc-shared-cache.h @@ -0,0 +1,1363 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* +Portions derived from: + +-------------------------------------------------------------------- +lookup8.c, by Bob Jenkins, January 4 1997, Public Domain. +hash(), hash2(), hash3, and mix() are externally useful functions. +Routines to test the hash are included if SELF_TEST is defined. +You can use this free for any purpose. It has no warranty. +-------------------------------------------------------------------- + +------------------------------------------------------------------------------ +perfect.c: code to generate code for a hash for perfect hashing. +(c) Bob Jenkins, September 1996, December 1999 +You may use this code in any way you wish, and it is free. No warranty. +I hereby place this in the public domain. +Source is http://burtleburtle.net/bob/c/perfect.c +------------------------------------------------------------------------------ +*/ + +/* + * objc-selopt.h + * Interface between libobjc and dyld + * for selector uniquing in the dyld shared cache. + * + * When building the shared cache, dyld locates all selectors and selector + * references in the cached images. It builds a perfect hash table out of + * them and writes the table into the shared cache copy of libobjc. + * libobjc then uses that table as the builtin selector list. + * + * Versioning + * The table has a version number. dyld and objc can both ignore the table + * if the other used the wrong version number. + * + * Completeness + * Not all libraries are in the shared cache. Libraries that are in the + * shared cache and were optimized are specially marked. Libraries on + * disk never include those marks. + * + * Coherency + * Libraries optimized in the shared cache can be replaced by unoptimized + * copies from disk when loaded. The copy from disk is not marked and will + * be fixed up by libobjc. The shared cache copy is still mapped into the + * process, so the table can point to cstring data in that library's part + * of the shared cache without trouble. + * + * Atomicity + * dyld writes the table itself last. If dyld marks some metadata as + * updated but then fails to write a table for some reason, libobjc + * fixes up all metadata as if it were not marked. + */ + +#ifndef _OBJC_SELOPT_H +#define _OBJC_SELOPT_H + +/* + DO NOT INCLUDE ANY objc HEADERS HERE + dyld USES THIS FILE AND CANNOT SEE THEM +*/ +#include +#include +#ifdef SELOPT_WRITE +#include +#endif +/* + DO NOT INCLUDE ANY objc HEADERS HERE + dyld USES THIS FILE AND CANNOT SEE THEM +*/ + +#ifndef STATIC_ASSERT +# define STATIC_ASSERT(x) _STATIC_ASSERT2(x, __LINE__) +# define _STATIC_ASSERT2(x, line) _STATIC_ASSERT3(x, line) +# define _STATIC_ASSERT3(x, line) \ + typedef struct { \ + int _static_assert[(x) ? 0 : -1]; \ + } _static_assert_ ## line __attribute__((unavailable)) +#endif + +#define SELOPT_DEBUG 0 + +#define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x) +#define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x) + +namespace objc_opt { + +typedef int32_t objc_stringhash_offset_t; +typedef uint8_t objc_stringhash_check_t; + +#ifdef SELOPT_WRITE + +// Perfect hash code is at the end of this file. + +struct perfect_hash { + uint32_t capacity; + uint32_t occupied; + uint32_t shift; + uint32_t mask; + uint64_t salt; + + uint32_t scramble[256]; + uint8_t *tab; // count == mask+1; free with delete[] + + perfect_hash() : tab(0) { } + + ~perfect_hash() { if (tab) delete[] tab; } +}; + +struct eqstr { + bool operator()(const char* s1, const char* s2) const { + return strcmp(s1, s2) == 0; + } +}; + +// cstring => cstring's vmaddress +// (used for selector names and class names) +typedef __gnu_cxx::hash_map, eqstr> string_map; + +// class name => (class vmaddress, header_info vmaddress) +typedef __gnu_cxx::hash_multimap, __gnu_cxx::hash, eqstr> class_map; + +static perfect_hash make_perfect(const string_map& strings); + +#endif + +static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level); + +// Precomputed perfect hash table of strings. +// Base class for precomputed selector table and class table. +// Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure. +struct objc_stringhash_t { + uint32_t capacity; + uint32_t occupied; + uint32_t shift; + uint32_t mask; + uint32_t zero; + uint32_t unused; // alignment pad + uint64_t salt; + + uint32_t scramble[256]; + uint8_t tab[0]; /* tab[mask+1] (always power-of-2) */ + // uint8_t checkbytes[capacity]; /* check byte for each string */ + // int32_t offsets[capacity]; /* offsets from &capacity to cstrings */ + + objc_stringhash_check_t *checkbytes() { return (objc_stringhash_check_t *)&tab[mask+1]; } + const objc_stringhash_check_t *checkbytes() const { return (const objc_stringhash_check_t *)&tab[mask+1]; } + + objc_stringhash_offset_t *offsets() { return (objc_stringhash_offset_t *)&checkbytes()[capacity]; } + const objc_stringhash_offset_t *offsets() const { return (const objc_stringhash_offset_t *)&checkbytes()[capacity]; } + + uint32_t hash(const char *key) const + { + uint64_t val = lookup8((uint8_t*)key, strlen(key), salt); + uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]]; + return index; + } + + // The check bytes areused to reject strings that aren't in the table + // without paging in the table's cstring data. This checkbyte calculation + // catches 4785/4815 rejects when launching Safari; a perfect checkbyte + // would catch 4796/4815. + objc_stringhash_check_t checkbyte(const char *key) const + { + return + ((key[0] & 0x7) << 5) + | + (strlen(key) & 0x1f); + } + +#define INDEX_NOT_FOUND (~(uint32_t)0) + + uint32_t getIndex(const char *key) const + { + uint32_t h = hash(key); + + // Use check byte to reject without paging in the table's cstrings + objc_stringhash_check_t h_check = checkbytes()[h]; + objc_stringhash_check_t key_check = checkbyte(key); + bool check_fail = (h_check != key_check); +#if ! SELOPT_DEBUG + if (check_fail) return INDEX_NOT_FOUND; +#endif + + const char *result = (const char *)this + offsets()[h]; + if (0 != strcmp(key, result)) return INDEX_NOT_FOUND; + +#if SELOPT_DEBUG + if (check_fail) abort(); +#endif + + return h; + } + +#ifdef SELOPT_WRITE + + size_t size() + { + return sizeof(objc_stringhash_t) + + mask+1 + + capacity * sizeof(objc_stringhash_check_t) + + capacity * sizeof(objc_stringhash_offset_t); + } + + void byteswap(bool little_endian) + { + // tab and checkbytes are arrays of bytes, no swap needed + for (uint32_t i = 0; i < 256; i++) { + S32(scramble[i]); + } + objc_stringhash_offset_t *o = offsets(); + for (uint32_t i = 0; i < capacity; i++) { + S32(o[i]); + } + + S32(capacity); + S32(occupied); + S32(shift); + S32(mask); + S32(zero); + S64(salt); + } + + const char *write(uint64_t base, size_t remaining, string_map& strings) + { + if (sizeof(objc_stringhash_t) > remaining) { + return "selector section too small (metadata not optimized)"; + } + + if (strings.size() == 0) { + bzero(this, sizeof(objc_stringhash_t)); + return NULL; + } + + perfect_hash phash = make_perfect(strings); + if (phash.capacity == 0) { + return "perfect hash failed (metadata not optimized)"; + } + + // Set header + capacity = phash.capacity; + occupied = phash.occupied; + shift = phash.shift; + mask = phash.mask; + zero = 0; + unused = 0; + salt = phash.salt; + + if (size() > remaining) { + return "selector section too small (metadata not optimized)"; + } + + // Set hash data + for (uint32_t i = 0; i < 256; i++) { + scramble[i] = phash.scramble[i]; + } + for (uint32_t i = 0; i < phash.mask+1; i++) { + tab[i] = phash.tab[i]; + } + + // Set offsets to "" + for (uint32_t i = 0; i < phash.capacity; i++) { + offsets()[i] = + (objc_stringhash_offset_t)offsetof(objc_stringhash_t, zero); + } + // Set checkbytes to 0 + for (uint32_t i = 0; i < phash.capacity; i++) { + checkbytes()[i] = 0; + } + + // Set real string offsets and checkbytes +# define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t)) + string_map::const_iterator s; + for (s = strings.begin(); s != strings.end(); ++s) { + int64_t offset = s->second - base; + if ((offset<>SHIFT != offset) { + return "selector offset too big (metadata not optimized)"; + } + + uint32_t h = hash(s->first); + offsets()[h] = (objc_stringhash_offset_t)offset; + checkbytes()[h] = checkbyte(s->first); + } +# undef SHIFT + + return NULL; + } + +// SELOPT_WRITE +#endif +}; + + +// Precomputed selector table. +// Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure. +struct objc_selopt_t : objc_stringhash_t { + const char *get(const char *key) const + { + uint32_t h = getIndex(key); + if (h == INDEX_NOT_FOUND) return NULL; + + return (const char *)this + offsets()[h]; + } +}; + +// Precomputed class list. +// Edit objc-sel-table.s and OPT_INITIALIZER if you change these structures. + +struct objc_classheader_t { + objc_stringhash_offset_t clsOffset; + objc_stringhash_offset_t hiOffset; + + // For duplicate class names: + // clsOffset = count<<1 | 1 + // duplicated classes are duplicateOffsets[hiOffset..hiOffset+count-1] + bool isDuplicate() const { return clsOffset & 1; } + uint32_t duplicateCount() const { return clsOffset >> 1; } + uint32_t duplicateIndex() const { return hiOffset; } +}; + + +struct objc_clsopt_t : objc_stringhash_t { + // ...objc_stringhash_t fields... + // objc_classheader_t classOffsets[capacity]; /* offsets from &capacity to class_t and header_info */ + // uint32_t duplicateCount; + // objc_classheader_t duplicateOffsets[duplicatedClasses]; + + objc_classheader_t *classOffsets() { return (objc_classheader_t *)&offsets()[capacity]; } + const objc_classheader_t *classOffsets() const { return (const objc_classheader_t *)&offsets()[capacity]; } + + uint32_t& duplicateCount() { return *(uint32_t *)&classOffsets()[capacity]; } + const uint32_t& duplicateCount() const { return *(const uint32_t *)&classOffsets()[capacity]; } + + objc_classheader_t *duplicateOffsets() { return (objc_classheader_t *)(&duplicateCount()+1); } + const objc_classheader_t *duplicateOffsets() const { return (const objc_classheader_t *)(&duplicateCount()+1); } + + // 0/NULL/NULL: not found + // 1/ptr/ptr: found exactly one + // n/NULL/NULL: found N - use getClassesAndHeaders() instead + uint32_t getClassAndHeader(const char *key, void*& cls, void*& hi) const + { + uint32_t h = getIndex(key); + if (h == INDEX_NOT_FOUND) { + cls = NULL; + hi = NULL; + return 0; + } + + const objc_classheader_t& clshi = classOffsets()[h]; + if (! clshi.isDuplicate()) { + // class appears in exactly one header + cls = (void *)((const char *)this + clshi.clsOffset); + hi = (void *)((const char *)this + clshi.hiOffset); + return 1; + } + else { + // class appears in more than one header - use getClassesAndHeaders + cls = NULL; + hi = NULL; + return clshi.duplicateCount(); + } + } + + void getClassesAndHeaders(const char *key, void **cls, void **hi) const + { + uint32_t h = getIndex(key); + if (h == INDEX_NOT_FOUND) return; + + const objc_classheader_t& clshi = classOffsets()[h]; + if (! clshi.isDuplicate()) { + // class appears in exactly one header + cls[0] = (void *)((const char *)this + clshi.clsOffset); + hi[0] = (void *)((const char *)this + clshi.hiOffset); + } + else { + // class appears in more than one header + uint32_t count = clshi.duplicateCount(); + const objc_classheader_t *list = + &duplicateOffsets()[clshi.duplicateIndex()]; + for (uint32_t i = 0; i < count; i++) { + cls[i] = (void *)((const char *)this + list[i].clsOffset); + hi[i] = (void *)((const char *)this + list[i].hiOffset); + } + } + } + +#ifdef SELOPT_WRITE + + size_t size() + { + return + objc_stringhash_t::size() + + capacity * sizeof(objc_classheader_t) + + sizeof(duplicateCount()) + + duplicateCount() * sizeof(objc_classheader_t); + } + + void byteswap(bool little_endian) + { + objc_classheader_t *o; + + o = classOffsets(); + for (uint32_t i = 0; i < capacity; i++) { + S32(o[i].clsOffset); + S32(o[i].hiOffset); + } + + o = duplicateOffsets(); + for (uint32_t i = 0; i < duplicateCount(); i++) { + S32(o[i].clsOffset); + S32(o[i].hiOffset); + } + + S32(duplicateCount()); + + objc_stringhash_t::byteswap(little_endian); + } + + const char *write(uint64_t base, size_t remaining, + string_map& strings, class_map& classes, bool verbose) + { + const char *err; + err = objc_stringhash_t::write(base, remaining, strings); + if (err) return err; + + if (size() > remaining) { + return "selector section too small (metadata not optimized)"; + } + + // Set class offsets to &zero + objc_stringhash_offset_t zeroOffset = + (objc_stringhash_offset_t)offsetof(objc_stringhash_t, zero); + for (uint32_t i = 0; i < capacity; i++) { + classOffsets()[i].clsOffset = zeroOffset; + classOffsets()[i].hiOffset = zeroOffset; + } + + // Set real class offsets +# define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t)) + class_map::const_iterator c; + for (c = classes.begin(); c != classes.end(); ++c) { + uint32_t h = getIndex(c->first); + if (h == INDEX_NOT_FOUND) { + return "class list busted (metadata not optimized)"; + } + + if (classOffsets()[h].clsOffset != zeroOffset) { + // already did this class + continue; + } + + uint32_t count = classes.count(c->first); + if (count == 1) { + // only one class with this name + + int64_t coff = c->second.first - base; + int64_t hoff = c->second.second - base; + if ((coff<>SHIFT != coff) { + return "class offset too big (metadata not optimized)"; + } + if ((hoff<>SHIFT != hoff) { + return "header offset too big (metadata not optimized)"; + } + + classOffsets()[h].clsOffset = (objc_stringhash_offset_t)coff; + classOffsets()[h].hiOffset = (objc_stringhash_offset_t)hoff; + } + else { + // class name has duplicates - write them all now + if (verbose) { + fprintf(stderr, "update_dyld_shared_cache: %u duplicates of Objective-C class %s\n", count, c->first); + } + + uint32_t dest = duplicateCount(); + duplicateCount() += count; + if (size() > remaining) { + return "selector section too small (metadata not optimized)"; + } + + // classOffsets() instead contains count and array index + classOffsets()[h].clsOffset = count*2 + 1; + classOffsets()[h].hiOffset = dest; + + std::pair + duplicates = classes.equal_range(c->first); + class_map::const_iterator dup; + for (dup = duplicates.first; dup != duplicates.second; ++dup) { + int64_t coff = dup->second.first - base; + int64_t hoff = dup->second.second - base; + if ((coff<>SHIFT != coff) { + return "class offset too big (metadata not optimized)"; + } + if ((hoff<>SHIFT != hoff) { + return "header offset too big (metadata not optimized)"; + } + + duplicateOffsets()[dest].clsOffset = (objc_stringhash_offset_t)coff; + duplicateOffsets()[dest].hiOffset = (objc_stringhash_offset_t)hoff; + dest++; + } + } + } +# undef SHIFT + + return NULL; + } + +// SELOPT_WRITE +#endif +}; + +// Precomputed image list. +struct objc_headeropt_t; + +// Precomputed class list. +struct objc_clsopt_t; + +// Edit objc-sel-table.s if you change this value. +enum { VERSION = 12 }; + +// Top-level optimization structure. +// Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure. +struct objc_opt_t { + uint32_t version; + int32_t selopt_offset; + int32_t headeropt_offset; + int32_t clsopt_offset; + + const objc_selopt_t* selopt() const { + if (selopt_offset == 0) return NULL; + return (objc_selopt_t *)((uint8_t *)this + selopt_offset); + } + objc_selopt_t* selopt() { + if (selopt_offset == 0) return NULL; + return (objc_selopt_t *)((uint8_t *)this + selopt_offset); + } + + struct objc_headeropt_t* headeropt() const { + if (headeropt_offset == 0) return NULL; + return (struct objc_headeropt_t *)((uint8_t *)this + headeropt_offset); + } + + struct objc_clsopt_t* clsopt() const { + if (clsopt_offset == 0) return NULL; + return (objc_clsopt_t *)((uint8_t *)this + clsopt_offset); + } +}; + +// sizeof(objc_opt_t) must be pointer-aligned +STATIC_ASSERT(sizeof(objc_opt_t) % sizeof(void*) == 0); + +// Initializer for empty opt of type uint32_t[]. +#define X8(x) x, x, x, x, x, x, x, x +#define X64(x) X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x) +#define X256(x) X64(x), X64(x), X64(x), X64(x) +#define OPT_INITIALIZER { \ + /* objc_opt_t */ \ + objc_opt::VERSION, 16, 0, 0, \ + /* objc_selopt_t */ \ + 4, 4, 63, 3, 0, 0, 0,0, X256(0), 0, 0, 16, 16, 16, 16 \ + /* no objc_headeropt_t */ \ + /* no objc_clsopt_t */ \ +} + + +/* +-------------------------------------------------------------------- +mix -- mix 3 64-bit values reversibly. +mix() takes 48 machine instructions, but only 24 cycles on a superscalar + machine (like Intel's new MMX architecture). It requires 4 64-bit + registers for 4::2 parallelism. +All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of + (a,b,c), and all deltas of bottom bits were tested. All deltas were + tested both on random keys and on keys that were nearly all zero. + These deltas all cause every bit of c to change between 1/3 and 2/3 + of the time (well, only 113/400 to 287/400 of the time for some + 2-bit delta). These deltas all cause at least 80 bits to change + among (a,b,c) when the mix is run either forward or backward (yes it + is reversible). +This implies that a hash using mix64 has no funnels. There may be + characteristics with 3-bit deltas or bigger, I didn't test for + those. +-------------------------------------------------------------------- +*/ +#define mix64(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>43); \ + b -= c; b -= a; b ^= (a<<9); \ + c -= a; c -= b; c ^= (b>>8); \ + a -= b; a -= c; a ^= (c>>38); \ + b -= c; b -= a; b ^= (a<<23); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>35); \ + b -= c; b -= a; b ^= (a<<49); \ + c -= a; c -= b; c ^= (b>>11); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<18); \ + c -= a; c -= b; c ^= (b>>22); \ +} + +/* +-------------------------------------------------------------------- +hash() -- hash a variable-length key into a 64-bit value + k : the key (the unaligned variable-length array of bytes) + len : the length of the key, counting by bytes + level : can be any 8-byte value +Returns a 64-bit value. Every bit of the key affects every bit of +the return value. No funnels. Every 1-bit and 2-bit delta achieves +avalanche. About 41+5len instructions. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 64 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i= 24) + { + a += (k[0] +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24) + +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56)); + b += (k[8] +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24) + +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56)); + c += (k[16] +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24) + +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56)); + mix64(a,b,c); + k += 24; len -= 24; + } + + /*------------------------------------- handle the last 23 bytes */ + c += length; + switch(len) /* all the case statements fall through */ + { + case 23: c+=((uint64_t)k[22]<<56); + case 22: c+=((uint64_t)k[21]<<48); + case 21: c+=((uint64_t)k[20]<<40); + case 20: c+=((uint64_t)k[19]<<32); + case 19: c+=((uint64_t)k[18]<<24); + case 18: c+=((uint64_t)k[17]<<16); + case 17: c+=((uint64_t)k[16]<<8); + /* the first byte of c is reserved for the length */ + case 16: b+=((uint64_t)k[15]<<56); + case 15: b+=((uint64_t)k[14]<<48); + case 14: b+=((uint64_t)k[13]<<40); + case 13: b+=((uint64_t)k[12]<<32); + case 12: b+=((uint64_t)k[11]<<24); + case 11: b+=((uint64_t)k[10]<<16); + case 10: b+=((uint64_t)k[ 9]<<8); + case 9: b+=((uint64_t)k[ 8]); + case 8: a+=((uint64_t)k[ 7]<<56); + case 7: a+=((uint64_t)k[ 6]<<48); + case 6: a+=((uint64_t)k[ 5]<<40); + case 5: a+=((uint64_t)k[ 4]<<32); + case 4: a+=((uint64_t)k[ 3]<<24); + case 3: a+=((uint64_t)k[ 2]<<16); + case 2: a+=((uint64_t)k[ 1]<<8); + case 1: a+=((uint64_t)k[ 0]); + /* case 0: nothing left to add */ + } + mix64(a,b,c); + /*-------------------------------------------- report the result */ + return c; +} + + +#ifdef SELOPT_WRITE + +/* +------------------------------------------------------------------------------ +This generates a minimal perfect hash function. That means, given a +set of n keys, this determines a hash function that maps each of +those keys into a value in 0..n-1 with no collisions. + +The perfect hash function first uses a normal hash function on the key +to determine (a,b) such that the pair (a,b) is distinct for all +keys, then it computes a^scramble[tab[b]] to get the final perfect hash. +tab[] is an array of 1-byte values and scramble[] is a 256-term array of +2-byte or 4-byte values. If there are n keys, the length of tab[] is a +power of two between n/3 and n. + +I found the idea of computing distinct (a,b) values in "Practical minimal +perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, +Communications of the ACM, January 1992. They found the idea in Chichelli +(CACM Jan 1980). Beyond that, our methods differ. + +The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in +0..*blen*-1. A fast hash function determines both a and b +simultaneously. Any decent hash function is likely to produce +hashes so that (a,b) is distinct for all pairs. I try the hash +using different values of *salt* until all pairs are distinct. + +The final hash is (a XOR scramble[tab[b]]). *scramble* is a +predetermined mapping of 0..255 into 0..smax-1. *tab* is an +array that we fill in in such a way as to make the hash perfect. + +First we fill in all values of *tab* that are used by more than one +key. We try all possible values for each position until one works. + +This leaves m unmapped keys and m values that something could hash to. +If you treat unmapped keys as lefthand nodes and unused hash values +as righthand nodes, and draw a line connecting each key to each hash +value it could map to, you get a bipartite graph. We attempt to +find a perfect matching in this graph. If we succeed, we have +determined a perfect hash for the whole set of keys. + +*scramble* is used because (a^tab[i]) clusters keys around *a*. +------------------------------------------------------------------------------ +*/ + +typedef uint64_t ub8; +#define UB8MAXVAL 0xffffffffffffffffLL +#define UB8BITS 64 +typedef uint32_t ub4; +#define UB4MAXVAL 0xffffffff +#define UB4BITS 32 +typedef uint16_t ub2; +#define UB2MAXVAL 0xffff +#define UB2BITS 16 +typedef uint8_t ub1; +#define UB1MAXVAL 0xff +#define UB1BITS 8 + +#define TRUE 1 +#define FALSE 0 + +#define SCRAMBLE_LEN 256 // ((ub4)1<<16) /* length of *scramble* */ +#define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */ +#define RETRY_PERFECT 4 /* number of times to try to make a perfect hash */ + + +/* representation of a key */ +struct key +{ + ub1 *name_k; /* the actual key */ + ub4 len_k; /* the length of the actual key */ + ub4 hash_k; /* the initial hash value for this key */ +/* beyond this point is mapping-dependent */ + ub4 a_k; /* a, of the key maps to (a,b) */ + ub4 b_k; /* b, of the key maps to (a,b) */ + struct key *nextb_k; /* next key with this b */ +}; +typedef struct key key; + +/* things indexed by b of original (a,b) pair */ +struct bstuff +{ + ub2 val_b; /* hash=a^tabb[b].val_b */ + key *list_b; /* tabb[i].list_b is list of keys with b==i */ + ub4 listlen_b; /* length of list_b */ + ub4 water_b; /* high watermark of who has visited this map node */ +}; +typedef struct bstuff bstuff; + +/* things indexed by final hash value */ +struct hstuff +{ + key *key_h; /* tabh[i].key_h is the key with a hash of i */ +}; +typedef struct hstuff hstuff; + +/* things indexed by queue position */ +struct qstuff +{ + bstuff *b_q; /* b that currently occupies this hash */ + ub4 parent_q; /* queue position of parent that could use this hash */ + ub2 newval_q; /* what to change parent tab[b] to to use this hash */ + ub2 oldval_q; /* original value of tab[b] */ +}; +typedef struct qstuff qstuff; + + +/* +------------------------------------------------------------------------------ +Find the mapping that will produce a perfect hash +------------------------------------------------------------------------------ +*/ + +/* return the ceiling of the log (base 2) of val */ +static ub4 log2u(ub4 val) +{ + ub4 i; + for (i=0; ((ub4)1<>const3)); + x = (x+(x<>const5)); + } + return x; +} + +/* initialize scramble[] with distinct random values in 0..smax-1 */ +static void scrambleinit(ub4 *scramble, ub4 smax) +// ub4 *scramble; /* hash is a^scramble[tab[b]] */ +// ub4 smax; /* scramble values should be in 0..smax-1 */ +{ + ub4 i; + + /* fill scramble[] with distinct random integers in 0..smax-1 */ + for (i=0; ib_k + * check if the initial hash might work + */ +static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete) +// bstuff *tabb; /* output, list of keys with b for (a,b) */ +// ub4 blen; /* length of tabb */ +// key *keys; /* list of keys already hashed */ +// int complete; /* TRUE means to complete init despite collisions */ +{ + int nocollision = TRUE; + ub4 i; + + memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen)); + + /* Two keys with the same (a,b) guarantees a collision */ + for (i = 0; i < nkeys; i++) { + key *mykey = keys+i; + key *otherkey; + + for (otherkey=tabb[mykey->b_k].list_b; + otherkey; + otherkey=otherkey->nextb_k) + { + if (mykey->a_k == otherkey->a_k) + { + nocollision = FALSE; + if (!complete) + return FALSE; + } + } + ++tabb[mykey->b_k].listlen_b; + mykey->nextb_k = tabb[mykey->b_k].list_b; + tabb[mykey->b_k].list_b = mykey; + } + + /* no two keys have the same (a,b) pair */ + return nocollision; +} + + +/* Do the initial hash for normal mode (use lookup and checksum) */ +static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt) +// key *keys; /* list of all keys */ +// ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */ +// ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */ +// ub4 smax; /* maximum range of computable hash values */ +// ub4 salt; /* used to initialize the hash function */ +// gencode *final; /* output, code for the final hash */ +{ + ub4 loga = log2u(alen); /* log based 2 of blen */ + ub4 i; + for (i = 0; i < nkeys; i++) { + key *mykey = keys+i; + ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt); + mykey->a_k = (loga > 0) ? hash>>(UB8BITS-loga) : 0; + mykey->b_k = (blen > 1) ? hash&(blen-1) : 0; + } +} + + +/* Try to apply an augmenting list */ +static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback) +// bstuff *tabb; +// hstuff *tabh; +// qstuff *tabq; +// ub4 blen; +// ub4 *scramble; +// ub4 tail; +// int rollback; /* FALSE applies augmenting path, TRUE rolls back */ +{ + ub4 hash; + key *mykey; + bstuff *pb; + ub4 child; + ub4 parent; + ub4 stabb; /* scramble[tab[b]] */ + + /* walk from child to parent */ + for (child=tail-1; child; child=parent) + { + parent = tabq[child].parent_q; /* find child's parent */ + pb = tabq[parent].b_q; /* find parent's list of siblings */ + + /* erase old hash values */ + stabb = scramble[pb->val_b]; + for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) + { + hash = mykey->a_k^stabb; + if (mykey == tabh[hash].key_h) + { /* erase hash for all of child's siblings */ + tabh[hash].key_h = (key *)0; + } + } + + /* change pb->val_b, which will change the hashes of all parent siblings */ + pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q); + + /* set new hash values */ + stabb = scramble[pb->val_b]; + for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) + { + hash = mykey->a_k^stabb; + if (rollback) + { + if (parent == 0) continue; /* root never had a hash */ + } + else if (tabh[hash].key_h) + { + /* very rare: roll back any changes */ + apply(tabb, tabh, tabq, blen, scramble, tail, TRUE); + return FALSE; /* failure, collision */ + } + tabh[hash].key_h = mykey; + } + } + return TRUE; +} + + +/* +------------------------------------------------------------------------------- +augment(): Add item to the mapping. + +Construct a spanning tree of *b*s with *item* as root, where each +parent can have all its hashes changed (by some new val_b) with +at most one collision, and each child is the b of that collision. + +I got this from Tarjan's "Data Structures and Network Algorithms". The +path from *item* to a *b* that can be remapped with no collision is +an "augmenting path". Change values of tab[b] along the path so that +the unmapped key gets mapped and the unused hash value gets used. + +Assuming 1 key per b, if m out of n hash values are still unused, +you should expect the transitive closure to cover n/m nodes before +an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect +this approach to take about nlogn time to map all single-key b's. +------------------------------------------------------------------------------- +*/ +static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys, + ub4 highwater) +// bstuff *tabb; /* stuff indexed by b */ +// hstuff *tabh; /* which key is associated with which hash, indexed by hash */ +// qstuff *tabq; /* queue of *b* values, this is the spanning tree */ +// ub4 blen; /* length of tabb */ +// ub4 *scramble; /* final hash is a^scramble[tab[b]] */ +// ub4 smax; /* highest value in scramble */ +// bstuff *item; /* &tabb[b] for the b to be mapped */ +// ub4 nkeys; /* final hash must be in 0..nkeys-1 */ +// ub4 highwater; /* a value higher than any now in tabb[].water_b */ +{ + ub4 q; /* current position walking through the queue */ + ub4 tail; /* tail of the queue. 0 is the head of the queue. */ + ub4 limit=UB1MAXVAL+1; + ub4 highhash = smax; + + /* initialize the root of the spanning tree */ + tabq[0].b_q = item; + tail = 1; + + /* construct the spanning tree by walking the queue, add children to tail */ + for (q=0; qval_b */ + + if (q == 1) + break; /* don't do transitive closure */ + + for (i=0; ilist_b; mykey; mykey=mykey->nextb_k) + { + key *childkey; + ub4 hash = mykey->a_k^scramble[i]; + + if (hash >= highhash) break; /* out of bounds */ + childkey = tabh[hash].key_h; + + if (childkey) + { + bstuff *hitb = &tabb[childkey->b_k]; + + if (childb) + { + if (childb != hitb) break; /* hit at most one child b */ + } + else + { + childb = hitb; /* remember this as childb */ + if (childb->water_b == highwater) break; /* already explored */ + } + } + } + if (mykey) continue; /* myb with i has multiple collisions */ + + /* add childb to the queue of reachable things */ + if (childb) childb->water_b = highwater; + tabq[tail].b_q = childb; + tabq[tail].newval_q = i; /* how to make parent (myb) use this hash */ + tabq[tail].oldval_q = myb->val_b; /* need this for rollback */ + tabq[tail].parent_q = q; + ++tail; + + if (!childb) + { /* found an *i* with no collisions? */ + /* try to apply the augmenting path */ + if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE)) + return TRUE; /* success, item was added to the perfect hash */ + + --tail; /* don't know how to handle such a child! */ + } + } + } + return FALSE; +} + + +/* find a mapping that makes this a perfect hash */ +static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys) +{ + ub4 maxkeys; /* maximum number of keys for any b */ + ub4 i, j; + +#if SELOPT_DEBUG + fprintf(stderr, " blen %d smax %d nkeys %d\n", blen, smax, nkeys); +#endif + + /* clear any state from previous attempts */ + memset((void *)tabh, 0, sizeof(hstuff)*smax); + memset((void *)tabq, 0, sizeof(qstuff)*(blen+1)); + + for (maxkeys=0,i=0; i maxkeys) + maxkeys = tabb[i].listlen_b; + + /* In descending order by number of keys, map all *b*s */ + for (j=maxkeys; j>0; --j) + for (i=0; i= RETRY_INITKEY) + { + /* Try to put more bits in (A,B) to make distinct (A,B) more likely */ + if (*alen < maxalen) + { + *alen *= 2; + } + else if (*blen < smax) + { + *blen *= 2; + delete[] tabq; + delete[] *tabb; + *tabb = new bstuff[*blen]; + tabq = new qstuff[*blen+1]; + } + bad_initkey = 0; + bad_perfect = 0; + } + continue; /* two keys have same (a,b) pair */ + } + + /* Given distinct (A,B) for all keys, build a perfect hash */ + if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys)) + { + if (++bad_perfect >= RETRY_PERFECT) + { + if (*blen < smax) + { + *blen *= 2; + delete[] *tabb; + delete[] tabq; + *tabb = new bstuff[*blen]; + tabq = new qstuff[*blen+1]; + --si; /* we know this salt got distinct (A,B) */ + } + else + { + return 0; + } + bad_perfect = 0; + } + continue; + } + + break; + } + + /* free working memory */ + delete[] tabh; + delete[] tabq; + + return 1; +} + +/* +------------------------------------------------------------------------------ +Input/output type routines +------------------------------------------------------------------------------ +*/ + +/* get the list of keys */ +static void getkeys(key **keys, ub4 *nkeys, const string_map& strings) +{ + key *buf = new key[strings.size()]; + size_t i; + string_map::const_iterator s; + for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) { + key *mykey = buf+i; + mykey->name_k = (ub1 *)s->first; + mykey->len_k = (ub4)strlen(s->first); + } + *keys = buf; + *nkeys = strings.size(); +} + + +static perfect_hash +make_perfect(const string_map& strings) +{ + ub4 nkeys; /* number of keys */ + key *keys; /* head of list of keys */ + bstuff *tab; /* table indexed by b */ + ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */ + ub4 alen; /* a in 0..alen-1, a power of 2 */ + ub4 blen; /* b in 0..blen-1, a power of 2 */ + ub8 salt; /* a parameter to the hash function */ + ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */ + int ok; + int i; + perfect_hash result; + + /* read in the list of keywords */ + getkeys(&keys, &nkeys, strings); + + /* find the hash */ + smax = ((ub4)1<