]> git.saurik.com Git - apple/objc4.git/blob - runtime/objc-selopt.h
objc4-437.3.tar.gz
[apple/objc4.git] / runtime / objc-selopt.h
1 /*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 Portions derived from:
26
27 --------------------------------------------------------------------
28 lookup8.c, by Bob Jenkins, January 4 1997, Public Domain.
29 hash(), hash2(), hash3, and mix() are externally useful functions.
30 Routines to test the hash are included if SELF_TEST is defined.
31 You can use this free for any purpose. It has no warranty.
32 --------------------------------------------------------------------
33
34 ------------------------------------------------------------------------------
35 perfect.c: code to generate code for a hash for perfect hashing.
36 (c) Bob Jenkins, September 1996, December 1999
37 You may use this code in any way you wish, and it is free. No warranty.
38 I hereby place this in the public domain.
39 Source is http://burtleburtle.net/bob/c/perfect.c
40 ------------------------------------------------------------------------------
41 */
42
43 /*
44 * objc-selopt.h
45 * Interface between libobjc and dyld
46 * for selector uniquing in the dyld shared cache.
47 *
48 * When building the shared cache, dyld locates all selectors and selector
49 * references in the cached images. It builds a perfect hash table out of
50 * them and writes the table into the shared cache copy of libobjc.
51 * libobjc then uses that table as the builtin selector list.
52 *
53 * Versioning
54 * The table has a version number. dyld and objc can both ignore the table
55 * if the other used the wrong version number.
56 *
57 * Completeness
58 * Not all libraries are in the shared cache. Libraries that are in the
59 * shared cache and were optimized are specially marked. Libraries on
60 * disk never include those marks.
61 *
62 * Coherency
63 * Libraries optimized in the shared cache can be replaced by unoptimized
64 * copies from disk when loaded. The copy from disk is not marked and will
65 * be fixed up by libobjc. The shared cache copy is still mapped into the
66 * process, so the table can point to cstring data in that library's part
67 * of the shared cache without trouble.
68 *
69 * Atomicity
70 * dyld writes the table itself last. If dyld marks some metadata as
71 * updated but then fails to write a table for some reason, libobjc
72 * fixes up all metadata as if it were not marked.
73 */
74
75 #ifndef _OBJC_SELOPT_H
76 #define _OBJC_SELOPT_H
77
78 /*
79 DO NOT INCLUDE ANY objc HEADERS HERE
80 dyld USES THIS FILE AND CANNOT SEE THEM
81 */
82 #include <stdint.h>
83 #include <stdlib.h>
84 #include <ext/hash_map>
85 /*
86 DO NOT INCLUDE ANY objc HEADERS HERE
87 dyld USES THIS FILE AND CANNOT SEE THEM
88 */
89
90 // #define SELOPT_DEBUG
91
92 namespace objc_selopt {
93
94 typedef int32_t objc_selopt_offset_t;
95
96 #ifdef SELOPT_WRITE
97
98 // Perfect hash code is at the end of this file.
99
100 struct perfect_hash {
101 uint32_t capacity;
102 uint32_t occupied;
103 uint32_t shift;
104 uint32_t mask;
105 uint64_t salt;
106
107 uint32_t scramble[256];
108 uint8_t *tab; // count == mask+1; free with delete[]
109
110 perfect_hash() : tab(0) { }
111
112 ~perfect_hash() { if (tab) delete[] tab; }
113 };
114
115 struct eqstr {
116 bool operator()(const char* s1, const char* s2) const {
117 return strcmp(s1, s2) == 0;
118 }
119 };
120
121 typedef __gnu_cxx::hash_map<const char *, uint64_t, __gnu_cxx::hash<const char *>, eqstr> string_map;
122
123 static perfect_hash make_perfect(const string_map& strings);
124
125 #endif
126
127 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level);
128
129 enum { VERSION = 3 };
130
131 struct objc_selopt_t {
132
133 uint32_t version; /* this is version 3: external cstrings */
134 uint32_t capacity;
135 uint32_t occupied;
136 uint32_t shift;
137 uint32_t mask;
138 uint32_t zero;
139 uint64_t salt;
140 uint64_t base;
141
142 uint32_t scramble[256];
143 uint8_t tab[0]; /* tab[mask+1] (always power-of-2) */
144 // int32_t offsets[capacity]; /* offsets from &version to cstrings */
145
146 objc_selopt_offset_t *offsets() { return (objc_selopt_offset_t *)&tab[mask+1]; }
147 const objc_selopt_offset_t *offsets() const { return (const objc_selopt_offset_t *)&tab[mask+1]; }
148
149 uint32_t hash(const char *key) const
150 {
151 uint64_t val = lookup8((uint8_t*)key, strlen(key), salt);
152 uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]];
153 return index;
154 }
155
156 const char *get(const char *key) const
157 {
158 const char *result = (const char *)this + offsets()[hash(key)];
159 if (0 == strcmp(key, result)) return result;
160 else return NULL;
161 }
162
163 #ifdef SELOPT_WRITE
164 void set(const char *key, objc_selopt_offset_t value)
165 {
166 offsets()[hash(key)] = value;
167 }
168 #endif
169 };
170
171 // Initializer for empty table of type uint32_t[].
172 #define X8(x) x, x, x, x, x, x, x, x
173 #define X64(x) X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x)
174 #define X256(x) X64(x), X64(x), X64(x), X64(x)
175 #define SELOPT_INITIALIZER \
176 { objc_selopt::VERSION, 4, 4, 63, 3, 0, 0,0, 0,0, X256(0), 0, 20, 20, 20, 20 };
177
178
179 /*
180 --------------------------------------------------------------------
181 mix -- mix 3 64-bit values reversibly.
182 mix() takes 48 machine instructions, but only 24 cycles on a superscalar
183 machine (like Intel's new MMX architecture). It requires 4 64-bit
184 registers for 4::2 parallelism.
185 All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of
186 (a,b,c), and all deltas of bottom bits were tested. All deltas were
187 tested both on random keys and on keys that were nearly all zero.
188 These deltas all cause every bit of c to change between 1/3 and 2/3
189 of the time (well, only 113/400 to 287/400 of the time for some
190 2-bit delta). These deltas all cause at least 80 bits to change
191 among (a,b,c) when the mix is run either forward or backward (yes it
192 is reversible).
193 This implies that a hash using mix64 has no funnels. There may be
194 characteristics with 3-bit deltas or bigger, I didn't test for
195 those.
196 --------------------------------------------------------------------
197 */
198 #define mix64(a,b,c) \
199 { \
200 a -= b; a -= c; a ^= (c>>43); \
201 b -= c; b -= a; b ^= (a<<9); \
202 c -= a; c -= b; c ^= (b>>8); \
203 a -= b; a -= c; a ^= (c>>38); \
204 b -= c; b -= a; b ^= (a<<23); \
205 c -= a; c -= b; c ^= (b>>5); \
206 a -= b; a -= c; a ^= (c>>35); \
207 b -= c; b -= a; b ^= (a<<49); \
208 c -= a; c -= b; c ^= (b>>11); \
209 a -= b; a -= c; a ^= (c>>12); \
210 b -= c; b -= a; b ^= (a<<18); \
211 c -= a; c -= b; c ^= (b>>22); \
212 }
213
214 /*
215 --------------------------------------------------------------------
216 hash() -- hash a variable-length key into a 64-bit value
217 k : the key (the unaligned variable-length array of bytes)
218 len : the length of the key, counting by bytes
219 level : can be any 8-byte value
220 Returns a 64-bit value. Every bit of the key affects every bit of
221 the return value. No funnels. Every 1-bit and 2-bit delta achieves
222 avalanche. About 41+5len instructions.
223
224 The best hash table sizes are powers of 2. There is no need to do
225 mod a prime (mod is sooo slow!). If you need less than 64 bits,
226 use a bitmask. For example, if you need only 10 bits, do
227 h = (h & hashmask(10));
228 In which case, the hash table should have hashsize(10) elements.
229
230 If you are hashing n strings (uint8_t **)k, do it like this:
231 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
232
233 By Bob Jenkins, Jan 4 1997. bob_jenkins@burtleburtle.net. You may
234 use this code any way you wish, private, educational, or commercial,
235 but I would appreciate if you give me credit.
236
237 See http://burtleburtle.net/bob/hash/evahash.html
238 Use for hash table lookup, or anything where one collision in 2^^64
239 is acceptable. Do NOT use for cryptographic purposes.
240 --------------------------------------------------------------------
241 */
242
243 static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level)
244 // uint8_t *k; /* the key */
245 // uint64_t length; /* the length of the key */
246 // uint64_t level; /* the previous hash, or an arbitrary value */
247 {
248 uint64_t a,b,c;
249 size_t len;
250
251 /* Set up the internal state */
252 len = length;
253 a = b = level; /* the previous hash value */
254 c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */
255
256 /*---------------------------------------- handle most of the key */
257 while (len >= 24)
258 {
259 a += (k[0] +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24)
260 +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56));
261 b += (k[8] +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24)
262 +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56));
263 c += (k[16] +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24)
264 +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56));
265 mix64(a,b,c);
266 k += 24; len -= 24;
267 }
268
269 /*------------------------------------- handle the last 23 bytes */
270 c += length;
271 switch(len) /* all the case statements fall through */
272 {
273 case 23: c+=((uint64_t)k[22]<<56);
274 case 22: c+=((uint64_t)k[21]<<48);
275 case 21: c+=((uint64_t)k[20]<<40);
276 case 20: c+=((uint64_t)k[19]<<32);
277 case 19: c+=((uint64_t)k[18]<<24);
278 case 18: c+=((uint64_t)k[17]<<16);
279 case 17: c+=((uint64_t)k[16]<<8);
280 /* the first byte of c is reserved for the length */
281 case 16: b+=((uint64_t)k[15]<<56);
282 case 15: b+=((uint64_t)k[14]<<48);
283 case 14: b+=((uint64_t)k[13]<<40);
284 case 13: b+=((uint64_t)k[12]<<32);
285 case 12: b+=((uint64_t)k[11]<<24);
286 case 11: b+=((uint64_t)k[10]<<16);
287 case 10: b+=((uint64_t)k[ 9]<<8);
288 case 9: b+=((uint64_t)k[ 8]);
289 case 8: a+=((uint64_t)k[ 7]<<56);
290 case 7: a+=((uint64_t)k[ 6]<<48);
291 case 6: a+=((uint64_t)k[ 5]<<40);
292 case 5: a+=((uint64_t)k[ 4]<<32);
293 case 4: a+=((uint64_t)k[ 3]<<24);
294 case 3: a+=((uint64_t)k[ 2]<<16);
295 case 2: a+=((uint64_t)k[ 1]<<8);
296 case 1: a+=((uint64_t)k[ 0]);
297 /* case 0: nothing left to add */
298 }
299 mix64(a,b,c);
300 /*-------------------------------------------- report the result */
301 return c;
302 }
303
304
305 #ifdef SELOPT_WRITE
306
307 static const char *
308 write_selopt(void *dst, uint64_t base, size_t dstSize,
309 string_map& strings, bool little_endian,
310 size_t *outSize)
311 {
312 if (strings.size() == 0) return false;
313
314 perfect_hash phash = make_perfect(strings);
315 if (phash.capacity == 0) {
316 return "perfect hash failed (selectors not optimized)";
317 }
318
319 size_t size = sizeof(objc_selopt_t) + (phash.mask+1) + phash.capacity * sizeof(objc_selopt_offset_t);
320 if (size > dstSize) {
321 return "selector section too small (selectors not optimized)";
322 }
323
324 uint8_t *buf = new uint8_t[size];
325 objc_selopt_t *selopt = (objc_selopt_t *)buf;
326
327 // Set header
328 selopt->version = VERSION;
329 selopt->capacity = phash.capacity;
330 selopt->occupied = phash.occupied;
331 selopt->shift = phash.shift;
332 selopt->mask = phash.mask;
333 selopt->zero = 0;
334 selopt->salt = phash.salt;
335 selopt->base = base;
336
337 // Set hash data
338 for (uint32_t i = 0; i < 256; i++) {
339 selopt->scramble[i] = phash.scramble[i];
340 }
341 for (uint32_t i = 0; i < phash.mask+1; i++) {
342 selopt->tab[i] = phash.tab[i];
343 }
344
345 // Set offsets to ""
346 for (uint32_t i = 0; i < phash.capacity; i++) {
347 selopt->offsets()[i] =
348 (objc_selopt_offset_t)offsetof(objc_selopt_t, zero);
349 }
350
351 // Set real string offsets
352 # define SHIFT (64 - 8*sizeof(objc_selopt_offset_t))
353 string_map::const_iterator s;
354 for (s = strings.begin(); s != strings.end(); ++s) {
355 int64_t offset = s->second - base;
356 if ((offset<<SHIFT)>>SHIFT != offset) {
357 delete[] buf;
358 return "selector offset too big (selectors not optimized)";
359 }
360 selopt->set(s->first, (objc_selopt_offset_t)offset);
361 }
362 # undef SHIFT;
363
364 // Byte-swap everything
365 #define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x)
366 #define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x)
367 for (uint32_t i = 0; i < 256; i++) {
368 S32(selopt->scramble[i]);
369 }
370 // tab is array of bytes, no swap needed
371 for (uint32_t i = 0; i < phash.capacity; i++) {
372 S32(selopt->offsets()[i]);
373 }
374
375 S32(selopt->version);
376 S32(selopt->capacity);
377 S32(selopt->occupied);
378 S32(selopt->shift);
379 S32(selopt->mask);
380 S32(selopt->zero);
381 S64(selopt->salt);
382 S64(selopt->base);
383 #undef S32
384 #undef S64
385
386 memcpy(dst, selopt, size);
387 if (outSize) *outSize = size;
388
389 delete[] buf;
390 return NULL;
391 }
392
393
394 /*
395 ------------------------------------------------------------------------------
396 This generates a minimal perfect hash function. That means, given a
397 set of n keys, this determines a hash function that maps each of
398 those keys into a value in 0..n-1 with no collisions.
399
400 The perfect hash function first uses a normal hash function on the key
401 to determine (a,b) such that the pair (a,b) is distinct for all
402 keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
403 tab[] is an array of 1-byte values and scramble[] is a 256-term array of
404 2-byte or 4-byte values. If there are n keys, the length of tab[] is a
405 power of two between n/3 and n.
406
407 I found the idea of computing distinct (a,b) values in "Practical minimal
408 perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
409 Communications of the ACM, January 1992. They found the idea in Chichelli
410 (CACM Jan 1980). Beyond that, our methods differ.
411
412 The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
413 0..*blen*-1. A fast hash function determines both a and b
414 simultaneously. Any decent hash function is likely to produce
415 hashes so that (a,b) is distinct for all pairs. I try the hash
416 using different values of *salt* until all pairs are distinct.
417
418 The final hash is (a XOR scramble[tab[b]]). *scramble* is a
419 predetermined mapping of 0..255 into 0..smax-1. *tab* is an
420 array that we fill in in such a way as to make the hash perfect.
421
422 First we fill in all values of *tab* that are used by more than one
423 key. We try all possible values for each position until one works.
424
425 This leaves m unmapped keys and m values that something could hash to.
426 If you treat unmapped keys as lefthand nodes and unused hash values
427 as righthand nodes, and draw a line connecting each key to each hash
428 value it could map to, you get a bipartite graph. We attempt to
429 find a perfect matching in this graph. If we succeed, we have
430 determined a perfect hash for the whole set of keys.
431
432 *scramble* is used because (a^tab[i]) clusters keys around *a*.
433 ------------------------------------------------------------------------------
434 */
435
436 typedef uint64_t ub8;
437 #define UB8MAXVAL 0xffffffffffffffffLL
438 #define UB8BITS 64
439 typedef uint32_t ub4;
440 #define UB4MAXVAL 0xffffffff
441 #define UB4BITS 32
442 typedef uint16_t ub2;
443 #define UB2MAXVAL 0xffff
444 #define UB2BITS 16
445 typedef uint8_t ub1;
446 #define UB1MAXVAL 0xff
447 #define UB1BITS 8
448
449 #define TRUE 1
450 #define FALSE 0
451
452 #define SCRAMBLE_LEN 256 // ((ub4)1<<16) /* length of *scramble* */
453 #define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */
454 #define RETRY_PERFECT 4 /* number of times to try to make a perfect hash */
455
456
457 /* representation of a key */
458 struct key
459 {
460 ub1 *name_k; /* the actual key */
461 ub4 len_k; /* the length of the actual key */
462 ub4 hash_k; /* the initial hash value for this key */
463 /* beyond this point is mapping-dependent */
464 ub4 a_k; /* a, of the key maps to (a,b) */
465 ub4 b_k; /* b, of the key maps to (a,b) */
466 struct key *nextb_k; /* next key with this b */
467 };
468 typedef struct key key;
469
470 /* things indexed by b of original (a,b) pair */
471 struct bstuff
472 {
473 ub2 val_b; /* hash=a^tabb[b].val_b */
474 key *list_b; /* tabb[i].list_b is list of keys with b==i */
475 ub4 listlen_b; /* length of list_b */
476 ub4 water_b; /* high watermark of who has visited this map node */
477 };
478 typedef struct bstuff bstuff;
479
480 /* things indexed by final hash value */
481 struct hstuff
482 {
483 key *key_h; /* tabh[i].key_h is the key with a hash of i */
484 };
485 typedef struct hstuff hstuff;
486
487 /* things indexed by queue position */
488 struct qstuff
489 {
490 bstuff *b_q; /* b that currently occupies this hash */
491 ub4 parent_q; /* queue position of parent that could use this hash */
492 ub2 newval_q; /* what to change parent tab[b] to to use this hash */
493 ub2 oldval_q; /* original value of tab[b] */
494 };
495 typedef struct qstuff qstuff;
496
497
498 /*
499 ------------------------------------------------------------------------------
500 Find the mapping that will produce a perfect hash
501 ------------------------------------------------------------------------------
502 */
503
504 /* return the ceiling of the log (base 2) of val */
505 static ub4 log2u(ub4 val)
506 {
507 ub4 i;
508 for (i=0; ((ub4)1<<i) < val; ++i)
509 ;
510 return i;
511 }
512
513 /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
514 /* permute(0)=0. This is intended and useful. */
515 static ub4 permute(ub4 x, ub4 nbits)
516 // ub4 x; /* input, a value in some range */
517 // ub4 nbits; /* input, number of bits in range */
518 {
519 int i;
520 int mask = ((ub4)1<<nbits)-1; /* all ones */
521 int const2 = 1+nbits/2;
522 int const3 = 1+nbits/3;
523 int const4 = 1+nbits/4;
524 int const5 = 1+nbits/5;
525 for (i=0; i<20; ++i)
526 {
527 x = (x+(x<<const2)) & mask;
528 x = (x^(x>>const3));
529 x = (x+(x<<const4)) & mask;
530 x = (x^(x>>const5));
531 }
532 return x;
533 }
534
535 /* initialize scramble[] with distinct random values in 0..smax-1 */
536 static void scrambleinit(ub4 *scramble, ub4 smax)
537 // ub4 *scramble; /* hash is a^scramble[tab[b]] */
538 // ub4 smax; /* scramble values should be in 0..smax-1 */
539 {
540 ub4 i;
541
542 /* fill scramble[] with distinct random integers in 0..smax-1 */
543 for (i=0; i<SCRAMBLE_LEN; ++i)
544 {
545 scramble[i] = permute(i, log2u(smax));
546 }
547 }
548
549
550 /*
551 * put keys in tabb according to key->b_k
552 * check if the initial hash might work
553 */
554 static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete)
555 // bstuff *tabb; /* output, list of keys with b for (a,b) */
556 // ub4 blen; /* length of tabb */
557 // key *keys; /* list of keys already hashed */
558 // int complete; /* TRUE means to complete init despite collisions */
559 {
560 int nocollision = TRUE;
561 ub4 i;
562
563 memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
564
565 /* Two keys with the same (a,b) guarantees a collision */
566 for (i = 0; i < nkeys; i++) {
567 key *mykey = keys+i;
568 key *otherkey;
569
570 for (otherkey=tabb[mykey->b_k].list_b;
571 otherkey;
572 otherkey=otherkey->nextb_k)
573 {
574 if (mykey->a_k == otherkey->a_k)
575 {
576 nocollision = FALSE;
577 if (!complete)
578 return FALSE;
579 }
580 }
581 ++tabb[mykey->b_k].listlen_b;
582 mykey->nextb_k = tabb[mykey->b_k].list_b;
583 tabb[mykey->b_k].list_b = mykey;
584 }
585
586 /* no two keys have the same (a,b) pair */
587 return nocollision;
588 }
589
590
591 /* Do the initial hash for normal mode (use lookup and checksum) */
592 static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt)
593 // key *keys; /* list of all keys */
594 // ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */
595 // ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */
596 // ub4 smax; /* maximum range of computable hash values */
597 // ub4 salt; /* used to initialize the hash function */
598 // gencode *final; /* output, code for the final hash */
599 {
600 ub4 loga = log2u(alen); /* log based 2 of blen */
601 ub4 i;
602 for (i = 0; i < nkeys; i++) {
603 key *mykey = keys+i;
604 ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt);
605 mykey->a_k = (loga > 0) ? hash>>(UB8BITS-loga) : 0;
606 mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
607 }
608 }
609
610
611 /* Try to apply an augmenting list */
612 static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback)
613 // bstuff *tabb;
614 // hstuff *tabh;
615 // qstuff *tabq;
616 // ub4 blen;
617 // ub4 *scramble;
618 // ub4 tail;
619 // int rollback; /* FALSE applies augmenting path, TRUE rolls back */
620 {
621 ub4 hash;
622 key *mykey;
623 bstuff *pb;
624 ub4 child;
625 ub4 parent;
626 ub4 stabb; /* scramble[tab[b]] */
627
628 /* walk from child to parent */
629 for (child=tail-1; child; child=parent)
630 {
631 parent = tabq[child].parent_q; /* find child's parent */
632 pb = tabq[parent].b_q; /* find parent's list of siblings */
633
634 /* erase old hash values */
635 stabb = scramble[pb->val_b];
636 for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
637 {
638 hash = mykey->a_k^stabb;
639 if (mykey == tabh[hash].key_h)
640 { /* erase hash for all of child's siblings */
641 tabh[hash].key_h = (key *)0;
642 }
643 }
644
645 /* change pb->val_b, which will change the hashes of all parent siblings */
646 pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
647
648 /* set new hash values */
649 stabb = scramble[pb->val_b];
650 for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
651 {
652 hash = mykey->a_k^stabb;
653 if (rollback)
654 {
655 if (parent == 0) continue; /* root never had a hash */
656 }
657 else if (tabh[hash].key_h)
658 {
659 /* very rare: roll back any changes */
660 apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
661 return FALSE; /* failure, collision */
662 }
663 tabh[hash].key_h = mykey;
664 }
665 }
666 return TRUE;
667 }
668
669
670 /*
671 -------------------------------------------------------------------------------
672 augment(): Add item to the mapping.
673
674 Construct a spanning tree of *b*s with *item* as root, where each
675 parent can have all its hashes changed (by some new val_b) with
676 at most one collision, and each child is the b of that collision.
677
678 I got this from Tarjan's "Data Structures and Network Algorithms". The
679 path from *item* to a *b* that can be remapped with no collision is
680 an "augmenting path". Change values of tab[b] along the path so that
681 the unmapped key gets mapped and the unused hash value gets used.
682
683 Assuming 1 key per b, if m out of n hash values are still unused,
684 you should expect the transitive closure to cover n/m nodes before
685 an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect
686 this approach to take about nlogn time to map all single-key b's.
687 -------------------------------------------------------------------------------
688 */
689 static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys,
690 ub4 highwater)
691 // bstuff *tabb; /* stuff indexed by b */
692 // hstuff *tabh; /* which key is associated with which hash, indexed by hash */
693 // qstuff *tabq; /* queue of *b* values, this is the spanning tree */
694 // ub4 blen; /* length of tabb */
695 // ub4 *scramble; /* final hash is a^scramble[tab[b]] */
696 // ub4 smax; /* highest value in scramble */
697 // bstuff *item; /* &tabb[b] for the b to be mapped */
698 // ub4 nkeys; /* final hash must be in 0..nkeys-1 */
699 // ub4 highwater; /* a value higher than any now in tabb[].water_b */
700 {
701 ub4 q; /* current position walking through the queue */
702 ub4 tail; /* tail of the queue. 0 is the head of the queue. */
703 ub4 limit=UB1MAXVAL+1;
704 ub4 highhash = smax;
705
706 /* initialize the root of the spanning tree */
707 tabq[0].b_q = item;
708 tail = 1;
709
710 /* construct the spanning tree by walking the queue, add children to tail */
711 for (q=0; q<tail; ++q)
712 {
713 bstuff *myb = tabq[q].b_q; /* the b for this node */
714 ub4 i; /* possible value for myb->val_b */
715
716 if (q == 1)
717 break; /* don't do transitive closure */
718
719 for (i=0; i<limit; ++i)
720 {
721 bstuff *childb = (bstuff *)0; /* the b that this i maps to */
722 key *mykey; /* for walking through myb's keys */
723
724 for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
725 {
726 key *childkey;
727 ub4 hash = mykey->a_k^scramble[i];
728
729 if (hash >= highhash) break; /* out of bounds */
730 childkey = tabh[hash].key_h;
731
732 if (childkey)
733 {
734 bstuff *hitb = &tabb[childkey->b_k];
735
736 if (childb)
737 {
738 if (childb != hitb) break; /* hit at most one child b */
739 }
740 else
741 {
742 childb = hitb; /* remember this as childb */
743 if (childb->water_b == highwater) break; /* already explored */
744 }
745 }
746 }
747 if (mykey) continue; /* myb with i has multiple collisions */
748
749 /* add childb to the queue of reachable things */
750 if (childb) childb->water_b = highwater;
751 tabq[tail].b_q = childb;
752 tabq[tail].newval_q = i; /* how to make parent (myb) use this hash */
753 tabq[tail].oldval_q = myb->val_b; /* need this for rollback */
754 tabq[tail].parent_q = q;
755 ++tail;
756
757 if (!childb)
758 { /* found an *i* with no collisions? */
759 /* try to apply the augmenting path */
760 if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
761 return TRUE; /* success, item was added to the perfect hash */
762
763 --tail; /* don't know how to handle such a child! */
764 }
765 }
766 }
767 return FALSE;
768 }
769
770
771 /* find a mapping that makes this a perfect hash */
772 static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys)
773 {
774 ub4 maxkeys; /* maximum number of keys for any b */
775 ub4 i, j;
776
777 #ifdef SELOPT_DEBUG
778 fprintf(stderr, " blen %d smax %d nkeys %d\n", blen, smax, nkeys);
779 #endif
780
781 /* clear any state from previous attempts */
782 memset((void *)tabh, 0, sizeof(hstuff)*smax);
783 memset((void *)tabq, 0, sizeof(qstuff)*(blen+1));
784
785 for (maxkeys=0,i=0; i<blen; ++i)
786 if (tabb[i].listlen_b > maxkeys)
787 maxkeys = tabb[i].listlen_b;
788
789 /* In descending order by number of keys, map all *b*s */
790 for (j=maxkeys; j>0; --j)
791 for (i=0; i<blen; ++i)
792 if (tabb[i].listlen_b == j)
793 if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys,
794 i+1))
795 {
796 return FALSE;
797 }
798
799 /* Success! We found a perfect hash of all keys into 0..nkeys-1. */
800 return TRUE;
801 }
802
803
804 /* guess initial values for alen and blen */
805 static void initalen(ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys)
806 // ub4 *alen; /* output, initial alen */
807 // ub4 *blen; /* output, initial blen */
808 // ub4 smax; /* input, power of two greater or equal to max hash value */
809 // ub4 nkeys; /* number of keys being hashed */
810 {
811 /*
812 * Find initial *alen, *blen
813 * Initial alen and blen values were found empirically. Some factors:
814 *
815 * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
816 *
817 * alen and blen must be powers of 2 because the values in 0..alen-1 and
818 * 0..blen-1 are produced by applying a bitmask to the initial hash function.
819 *
820 * alen must be less than smax, in fact less than nkeys, because otherwise
821 * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
822 * all the *a*s associated with a given *b*, so there would be no legal
823 * value to assign to tab[b]. This only matters when we're doing a minimal
824 * perfect hash.
825 *
826 * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
827 * and alen*blen = smax*smax/32.
828 *
829 * Values of blen less than smax/4 never work, and smax/2 always works.
830 *
831 * We want blen as small as possible because it is the number of bytes in
832 * the huge array we must create for the perfect hash.
833 *
834 * When nkey <= smax*(5/8), blen=smax/4 works much more often with
835 * alen=smax/8 than with alen=smax/4. Above smax*(5/8), blen=smax/4
836 * doesn't seem to care whether alen=smax/8 or alen=smax/4. I think it
837 * has something to do with 5/8 = 1/8 * 5. For example examine 80000,
838 * 85000, and 90000 keys with different values of alen. This only matters
839 * if we're doing a minimal perfect hash.
840 *
841 * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
842 * Bigger than that it must produce two integers, which increases the
843 * cost of the hash per character hashed.
844 */
845 *alen = smax; /* no reason to restrict alen to smax/2 */
846 *blen = ((nkeys <= smax*0.6) ? smax/16 :
847 (nkeys <= smax*0.8) ? smax/8 : smax/4);
848
849 if (*alen < 1) *alen = 1;
850 if (*blen < 1) *blen = 1;
851
852 #if SELOPT_DEBUG
853 fprintf(stderr, "alen %d blen %d smax %d nkeys %d\n", *alen, *blen, smax, nkeys);
854 #endif
855 }
856
857 /*
858 ** Try to find a perfect hash function.
859 ** Return the successful initializer for the initial hash.
860 ** Return 0 if no perfect hash could be found.
861 */
862 static int findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub8 *salt,
863 ub4 *scramble, ub4 smax, key *keys, ub4 nkeys)
864 // bstuff **tabb; /* output, tab[] of the perfect hash, length *blen */
865 // ub4 *alen; /* output, 0..alen-1 is range for a of (a,b) */
866 // ub4 *blen; /* output, 0..blen-1 is range for b of (a,b) */
867 // ub4 *salt; /* output, initializes initial hash */
868 // ub4 *scramble; /* input, hash = a^scramble[tab[b]] */
869 // ub4 smax; /* input, scramble[i] in 0..smax-1 */
870 // key *keys; /* input, keys to hash */
871 // ub4 nkeys; /* input, number of keys being hashed */
872 {
873 ub4 bad_initkey; /* how many times did initkey fail? */
874 ub4 bad_perfect; /* how many times did perfect fail? */
875 ub4 si; /* trial initializer for initial hash */
876 ub4 maxalen;
877 hstuff *tabh; /* table of keys indexed by hash value */
878 qstuff *tabq; /* table of stuff indexed by queue value, used by augment */
879
880 /* guess initial values for alen and blen */
881 initalen(alen, blen, smax, nkeys);
882
883 scrambleinit(scramble, smax);
884
885 maxalen = smax;
886
887 /* allocate working memory */
888 *tabb = new bstuff[*blen];
889 tabq = new qstuff[*blen+1];
890 tabh = new hstuff[smax];
891
892 /* Actually find the perfect hash */
893 *salt = 0;
894 bad_initkey = 0;
895 bad_perfect = 0;
896 for (si=1; ; ++si)
897 {
898 ub4 rslinit;
899 /* Try to find distinct (A,B) for all keys */
900 *salt = si * 0x9e3779b97f4a7c13LL; /* golden ratio (arbitrary value) */
901 initnorm(keys, nkeys, *alen, *blen, smax, *salt);
902 rslinit = inittab(*tabb, *blen, keys, nkeys, FALSE);
903 if (rslinit == 0)
904 {
905 /* didn't find distinct (a,b) */
906 if (++bad_initkey >= RETRY_INITKEY)
907 {
908 /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
909 if (*alen < maxalen)
910 {
911 *alen *= 2;
912 }
913 else if (*blen < smax)
914 {
915 *blen *= 2;
916 delete[] tabq;
917 delete[] *tabb;
918 *tabb = new bstuff[*blen];
919 tabq = new qstuff[*blen+1];
920 }
921 bad_initkey = 0;
922 bad_perfect = 0;
923 }
924 continue; /* two keys have same (a,b) pair */
925 }
926
927 /* Given distinct (A,B) for all keys, build a perfect hash */
928 if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys))
929 {
930 if (++bad_perfect >= RETRY_PERFECT)
931 {
932 if (*blen < smax)
933 {
934 *blen *= 2;
935 delete[] *tabb;
936 delete[] tabq;
937 *tabb = new bstuff[*blen];
938 tabq = new qstuff[*blen+1];
939 --si; /* we know this salt got distinct (A,B) */
940 }
941 else
942 {
943 return 0;
944 }
945 bad_perfect = 0;
946 }
947 continue;
948 }
949
950 break;
951 }
952
953 /* free working memory */
954 delete[] tabh;
955 delete[] tabq;
956
957 return 1;
958 }
959
960 /*
961 ------------------------------------------------------------------------------
962 Input/output type routines
963 ------------------------------------------------------------------------------
964 */
965
966 /* get the list of keys */
967 static void getkeys(key **keys, ub4 *nkeys, const string_map& strings)
968 {
969 key *buf = new key[strings.size()];
970 size_t i;
971 string_map::const_iterator s;
972 for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) {
973 key *mykey = buf+i;
974 mykey->name_k = (ub1 *)s->first;
975 mykey->len_k = (ub4)strlen(s->first);
976 }
977 *keys = buf;
978 *nkeys = strings.size();
979 }
980
981
982 static perfect_hash
983 make_perfect(const string_map& strings)
984 {
985 ub4 nkeys; /* number of keys */
986 key *keys; /* head of list of keys */
987 bstuff *tab; /* table indexed by b */
988 ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */
989 ub4 alen; /* a in 0..alen-1, a power of 2 */
990 ub4 blen; /* b in 0..blen-1, a power of 2 */
991 ub8 salt; /* a parameter to the hash function */
992 ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */
993 int ok;
994 int i;
995 perfect_hash result;
996
997 /* read in the list of keywords */
998 getkeys(&keys, &nkeys, strings);
999
1000 /* find the hash */
1001 smax = ((ub4)1<<log2u(nkeys));
1002 ok = findhash(&tab, &alen, &blen, &salt,
1003 scramble, smax, keys, nkeys);
1004 if (!ok) {
1005 smax = 2 * ((ub4)1<<log2u(nkeys));
1006 ok = findhash(&tab, &alen, &blen, &salt,
1007 scramble, smax, keys, nkeys);
1008 }
1009 if (!ok) {
1010 bzero(&result, sizeof(result));
1011 } else {
1012 /* build the tables */
1013 result.capacity = smax;
1014 result.occupied = nkeys;
1015 result.shift = UB8BITS - log2u(alen);
1016 result.mask = blen - 1;
1017 result.salt = salt;
1018
1019 result.tab = new uint8_t[blen];
1020 for (i = 0; i < blen; i++) {
1021 result.tab[i] = tab[i].val_b;
1022 }
1023 for (i = 0; i < 256; i++) {
1024 result.scramble[i] = scramble[i];
1025 }
1026 }
1027
1028 delete[] keys;
1029 delete[] tab;
1030
1031 return result;
1032 }
1033
1034 // SELOPT_WRITE
1035 #endif
1036
1037 // namespace objc_selopt
1038 };
1039
1040
1041 #endif