]>
Commit | Line | Data |
---|---|---|
832b6fce A |
1 | /* |
2 | * Copyright (c) 2008 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | ||
24 | /* | |
25 | Portions derived from: | |
26 | ||
27 | -------------------------------------------------------------------- | |
28 | lookup8.c, by Bob Jenkins, January 4 1997, Public Domain. | |
29 | hash(), hash2(), hash3, and mix() are externally useful functions. | |
30 | Routines to test the hash are included if SELF_TEST is defined. | |
31 | You can use this free for any purpose. It has no warranty. | |
32 | -------------------------------------------------------------------- | |
33 | ||
34 | ------------------------------------------------------------------------------ | |
35 | perfect.c: code to generate code for a hash for perfect hashing. | |
36 | (c) Bob Jenkins, September 1996, December 1999 | |
37 | You may use this code in any way you wish, and it is free. No warranty. | |
38 | I hereby place this in the public domain. | |
39 | Source is http://burtleburtle.net/bob/c/perfect.c | |
40 | ------------------------------------------------------------------------------ | |
41 | */ | |
42 | ||
43 | /* | |
44 | * objc-selopt.h | |
45 | * Interface between libobjc and dyld | |
46 | * for selector uniquing in the dyld shared cache. | |
47 | * | |
48 | * When building the shared cache, dyld locates all selectors and selector | |
49 | * references in the cached images. It builds a perfect hash table out of | |
50 | * them and writes the table into the shared cache copy of libobjc. | |
51 | * libobjc then uses that table as the builtin selector list. | |
52 | * | |
53 | * Versioning | |
54 | * The table has a version number. dyld and objc can both ignore the table | |
55 | * if the other used the wrong version number. | |
56 | * | |
57 | * Completeness | |
58 | * Not all libraries are in the shared cache. Libraries that are in the | |
59 | * shared cache and were optimized are specially marked. Libraries on | |
60 | * disk never include those marks. | |
61 | * | |
62 | * Coherency | |
63 | * Libraries optimized in the shared cache can be replaced by unoptimized | |
64 | * copies from disk when loaded. The copy from disk is not marked and will | |
65 | * be fixed up by libobjc. The shared cache copy is still mapped into the | |
66 | * process, so the table can point to cstring data in that library's part | |
67 | * of the shared cache without trouble. | |
68 | * | |
69 | * Atomicity | |
70 | * dyld writes the table itself last. If dyld marks some metadata as | |
71 | * updated but then fails to write a table for some reason, libobjc | |
72 | * fixes up all metadata as if it were not marked. | |
73 | */ | |
74 | ||
75 | #ifndef _OBJC_SELOPT_H | |
76 | #define _OBJC_SELOPT_H | |
77 | ||
78 | /* | |
79 | DO NOT INCLUDE ANY objc HEADERS HERE | |
80 | dyld USES THIS FILE AND CANNOT SEE THEM | |
81 | */ | |
82 | #include <stdint.h> | |
83 | #include <stdlib.h> | |
84 | #ifdef SELOPT_WRITE | |
2fd3f4e8 | 85 | #include <unordered_map> |
832b6fce A |
86 | #endif |
87 | /* | |
88 | DO NOT INCLUDE ANY objc HEADERS HERE | |
89 | dyld USES THIS FILE AND CANNOT SEE THEM | |
90 | */ | |
91 | ||
92 | #ifndef STATIC_ASSERT | |
93 | # define STATIC_ASSERT(x) _STATIC_ASSERT2(x, __LINE__) | |
94 | # define _STATIC_ASSERT2(x, line) _STATIC_ASSERT3(x, line) | |
95 | # define _STATIC_ASSERT3(x, line) \ | |
96 | typedef struct { \ | |
97 | int _static_assert[(x) ? 0 : -1]; \ | |
98 | } _static_assert_ ## line __attribute__((unavailable)) | |
99 | #endif | |
100 | ||
101 | #define SELOPT_DEBUG 0 | |
102 | ||
103 | #define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x) | |
104 | #define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x) | |
105 | ||
106 | namespace objc_opt { | |
107 | ||
108 | typedef int32_t objc_stringhash_offset_t; | |
109 | typedef uint8_t objc_stringhash_check_t; | |
110 | ||
2fd3f4e8 A |
111 | static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level); |
112 | ||
832b6fce A |
113 | #ifdef SELOPT_WRITE |
114 | ||
115 | // Perfect hash code is at the end of this file. | |
116 | ||
117 | struct perfect_hash { | |
118 | uint32_t capacity; | |
119 | uint32_t occupied; | |
120 | uint32_t shift; | |
121 | uint32_t mask; | |
122 | uint64_t salt; | |
123 | ||
124 | uint32_t scramble[256]; | |
125 | uint8_t *tab; // count == mask+1; free with delete[] | |
126 | ||
127 | perfect_hash() : tab(0) { } | |
128 | ||
129 | ~perfect_hash() { if (tab) delete[] tab; } | |
130 | }; | |
131 | ||
132 | struct eqstr { | |
133 | bool operator()(const char* s1, const char* s2) const { | |
134 | return strcmp(s1, s2) == 0; | |
135 | } | |
136 | }; | |
137 | ||
2fd3f4e8 A |
138 | struct hashstr { |
139 | size_t operator()(const char *s) const { | |
140 | return (size_t)lookup8((uint8_t *)s, strlen(s), 0); | |
141 | } | |
142 | }; | |
143 | ||
832b6fce A |
144 | // cstring => cstring's vmaddress |
145 | // (used for selector names and class names) | |
2fd3f4e8 | 146 | typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> string_map; |
832b6fce A |
147 | |
148 | // class name => (class vmaddress, header_info vmaddress) | |
2fd3f4e8 | 149 | typedef std::unordered_multimap<const char *, std::pair<uint64_t, uint64_t>, hashstr, eqstr> class_map; |
832b6fce A |
150 | |
151 | static perfect_hash make_perfect(const string_map& strings); | |
152 | ||
153 | #endif | |
154 | ||
832b6fce A |
155 | |
156 | // Precomputed perfect hash table of strings. | |
157 | // Base class for precomputed selector table and class table. | |
158 | // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure. | |
159 | struct objc_stringhash_t { | |
160 | uint32_t capacity; | |
161 | uint32_t occupied; | |
162 | uint32_t shift; | |
163 | uint32_t mask; | |
164 | uint32_t zero; | |
165 | uint32_t unused; // alignment pad | |
166 | uint64_t salt; | |
167 | ||
168 | uint32_t scramble[256]; | |
169 | uint8_t tab[0]; /* tab[mask+1] (always power-of-2) */ | |
170 | // uint8_t checkbytes[capacity]; /* check byte for each string */ | |
171 | // int32_t offsets[capacity]; /* offsets from &capacity to cstrings */ | |
172 | ||
173 | objc_stringhash_check_t *checkbytes() { return (objc_stringhash_check_t *)&tab[mask+1]; } | |
174 | const objc_stringhash_check_t *checkbytes() const { return (const objc_stringhash_check_t *)&tab[mask+1]; } | |
175 | ||
176 | objc_stringhash_offset_t *offsets() { return (objc_stringhash_offset_t *)&checkbytes()[capacity]; } | |
177 | const objc_stringhash_offset_t *offsets() const { return (const objc_stringhash_offset_t *)&checkbytes()[capacity]; } | |
178 | ||
2fd3f4e8 | 179 | uint32_t hash(const char *key, size_t keylen) const |
832b6fce | 180 | { |
2fd3f4e8 | 181 | uint64_t val = lookup8((uint8_t*)key, keylen, salt); |
832b6fce A |
182 | uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]]; |
183 | return index; | |
184 | } | |
185 | ||
2fd3f4e8 A |
186 | uint32_t hash(const char *key) const |
187 | { | |
188 | return hash(key, strlen(key)); | |
189 | } | |
190 | ||
832b6fce A |
191 | // The check bytes areused to reject strings that aren't in the table |
192 | // without paging in the table's cstring data. This checkbyte calculation | |
193 | // catches 4785/4815 rejects when launching Safari; a perfect checkbyte | |
194 | // would catch 4796/4815. | |
2fd3f4e8 | 195 | objc_stringhash_check_t checkbyte(const char *key, size_t keylen) const |
832b6fce A |
196 | { |
197 | return | |
198 | ((key[0] & 0x7) << 5) | |
199 | | | |
2fd3f4e8 | 200 | ((uint8_t)keylen & 0x1f); |
832b6fce A |
201 | } |
202 | ||
2fd3f4e8 A |
203 | objc_stringhash_check_t checkbyte(const char *key) const |
204 | { | |
205 | return checkbyte(key, strlen(key)); | |
206 | } | |
207 | ||
208 | ||
832b6fce A |
209 | #define INDEX_NOT_FOUND (~(uint32_t)0) |
210 | ||
211 | uint32_t getIndex(const char *key) const | |
212 | { | |
2fd3f4e8 A |
213 | size_t keylen = strlen(key); |
214 | uint32_t h = hash(key, keylen); | |
832b6fce A |
215 | |
216 | // Use check byte to reject without paging in the table's cstrings | |
217 | objc_stringhash_check_t h_check = checkbytes()[h]; | |
2fd3f4e8 | 218 | objc_stringhash_check_t key_check = checkbyte(key, keylen); |
832b6fce A |
219 | bool check_fail = (h_check != key_check); |
220 | #if ! SELOPT_DEBUG | |
221 | if (check_fail) return INDEX_NOT_FOUND; | |
222 | #endif | |
223 | ||
2fd3f4e8 A |
224 | // fixme change &zero to 0 in the next version-breaking update |
225 | objc_stringhash_offset_t offset = offsets()[h]; | |
226 | if (offset == offsetof(objc_stringhash_t,zero)) return INDEX_NOT_FOUND; | |
227 | const char *result = (const char *)this + offset; | |
832b6fce A |
228 | if (0 != strcmp(key, result)) return INDEX_NOT_FOUND; |
229 | ||
230 | #if SELOPT_DEBUG | |
231 | if (check_fail) abort(); | |
232 | #endif | |
233 | ||
234 | return h; | |
235 | } | |
236 | ||
237 | #ifdef SELOPT_WRITE | |
238 | ||
239 | size_t size() | |
240 | { | |
241 | return sizeof(objc_stringhash_t) | |
242 | + mask+1 | |
243 | + capacity * sizeof(objc_stringhash_check_t) | |
244 | + capacity * sizeof(objc_stringhash_offset_t); | |
245 | } | |
246 | ||
247 | void byteswap(bool little_endian) | |
248 | { | |
249 | // tab and checkbytes are arrays of bytes, no swap needed | |
250 | for (uint32_t i = 0; i < 256; i++) { | |
251 | S32(scramble[i]); | |
252 | } | |
253 | objc_stringhash_offset_t *o = offsets(); | |
254 | for (uint32_t i = 0; i < capacity; i++) { | |
255 | S32(o[i]); | |
256 | } | |
257 | ||
258 | S32(capacity); | |
259 | S32(occupied); | |
260 | S32(shift); | |
261 | S32(mask); | |
262 | S32(zero); | |
263 | S64(salt); | |
264 | } | |
265 | ||
266 | const char *write(uint64_t base, size_t remaining, string_map& strings) | |
267 | { | |
268 | if (sizeof(objc_stringhash_t) > remaining) { | |
269 | return "selector section too small (metadata not optimized)"; | |
270 | } | |
271 | ||
272 | if (strings.size() == 0) { | |
273 | bzero(this, sizeof(objc_stringhash_t)); | |
274 | return NULL; | |
275 | } | |
276 | ||
277 | perfect_hash phash = make_perfect(strings); | |
278 | if (phash.capacity == 0) { | |
279 | return "perfect hash failed (metadata not optimized)"; | |
280 | } | |
281 | ||
282 | // Set header | |
283 | capacity = phash.capacity; | |
284 | occupied = phash.occupied; | |
285 | shift = phash.shift; | |
286 | mask = phash.mask; | |
287 | zero = 0; | |
288 | unused = 0; | |
289 | salt = phash.salt; | |
290 | ||
291 | if (size() > remaining) { | |
292 | return "selector section too small (metadata not optimized)"; | |
293 | } | |
294 | ||
295 | // Set hash data | |
296 | for (uint32_t i = 0; i < 256; i++) { | |
297 | scramble[i] = phash.scramble[i]; | |
298 | } | |
299 | for (uint32_t i = 0; i < phash.mask+1; i++) { | |
300 | tab[i] = phash.tab[i]; | |
301 | } | |
302 | ||
303 | // Set offsets to "" | |
304 | for (uint32_t i = 0; i < phash.capacity; i++) { | |
305 | offsets()[i] = | |
306 | (objc_stringhash_offset_t)offsetof(objc_stringhash_t, zero); | |
307 | } | |
308 | // Set checkbytes to 0 | |
309 | for (uint32_t i = 0; i < phash.capacity; i++) { | |
310 | checkbytes()[i] = 0; | |
311 | } | |
312 | ||
313 | // Set real string offsets and checkbytes | |
314 | # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t)) | |
315 | string_map::const_iterator s; | |
316 | for (s = strings.begin(); s != strings.end(); ++s) { | |
317 | int64_t offset = s->second - base; | |
318 | if ((offset<<SHIFT)>>SHIFT != offset) { | |
319 | return "selector offset too big (metadata not optimized)"; | |
320 | } | |
321 | ||
322 | uint32_t h = hash(s->first); | |
323 | offsets()[h] = (objc_stringhash_offset_t)offset; | |
324 | checkbytes()[h] = checkbyte(s->first); | |
325 | } | |
326 | # undef SHIFT | |
327 | ||
328 | return NULL; | |
329 | } | |
330 | ||
331 | // SELOPT_WRITE | |
332 | #endif | |
333 | }; | |
334 | ||
335 | ||
336 | // Precomputed selector table. | |
337 | // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure. | |
338 | struct objc_selopt_t : objc_stringhash_t { | |
339 | const char *get(const char *key) const | |
340 | { | |
341 | uint32_t h = getIndex(key); | |
342 | if (h == INDEX_NOT_FOUND) return NULL; | |
343 | ||
344 | return (const char *)this + offsets()[h]; | |
345 | } | |
346 | }; | |
347 | ||
348 | // Precomputed class list. | |
349 | // Edit objc-sel-table.s and OPT_INITIALIZER if you change these structures. | |
350 | ||
351 | struct objc_classheader_t { | |
352 | objc_stringhash_offset_t clsOffset; | |
353 | objc_stringhash_offset_t hiOffset; | |
354 | ||
355 | // For duplicate class names: | |
356 | // clsOffset = count<<1 | 1 | |
357 | // duplicated classes are duplicateOffsets[hiOffset..hiOffset+count-1] | |
358 | bool isDuplicate() const { return clsOffset & 1; } | |
359 | uint32_t duplicateCount() const { return clsOffset >> 1; } | |
360 | uint32_t duplicateIndex() const { return hiOffset; } | |
361 | }; | |
362 | ||
363 | ||
364 | struct objc_clsopt_t : objc_stringhash_t { | |
365 | // ...objc_stringhash_t fields... | |
366 | // objc_classheader_t classOffsets[capacity]; /* offsets from &capacity to class_t and header_info */ | |
367 | // uint32_t duplicateCount; | |
368 | // objc_classheader_t duplicateOffsets[duplicatedClasses]; | |
369 | ||
370 | objc_classheader_t *classOffsets() { return (objc_classheader_t *)&offsets()[capacity]; } | |
371 | const objc_classheader_t *classOffsets() const { return (const objc_classheader_t *)&offsets()[capacity]; } | |
372 | ||
373 | uint32_t& duplicateCount() { return *(uint32_t *)&classOffsets()[capacity]; } | |
374 | const uint32_t& duplicateCount() const { return *(const uint32_t *)&classOffsets()[capacity]; } | |
375 | ||
376 | objc_classheader_t *duplicateOffsets() { return (objc_classheader_t *)(&duplicateCount()+1); } | |
377 | const objc_classheader_t *duplicateOffsets() const { return (const objc_classheader_t *)(&duplicateCount()+1); } | |
378 | ||
379 | // 0/NULL/NULL: not found | |
380 | // 1/ptr/ptr: found exactly one | |
381 | // n/NULL/NULL: found N - use getClassesAndHeaders() instead | |
382 | uint32_t getClassAndHeader(const char *key, void*& cls, void*& hi) const | |
383 | { | |
384 | uint32_t h = getIndex(key); | |
385 | if (h == INDEX_NOT_FOUND) { | |
386 | cls = NULL; | |
387 | hi = NULL; | |
388 | return 0; | |
389 | } | |
390 | ||
391 | const objc_classheader_t& clshi = classOffsets()[h]; | |
392 | if (! clshi.isDuplicate()) { | |
393 | // class appears in exactly one header | |
394 | cls = (void *)((const char *)this + clshi.clsOffset); | |
395 | hi = (void *)((const char *)this + clshi.hiOffset); | |
396 | return 1; | |
397 | } | |
398 | else { | |
399 | // class appears in more than one header - use getClassesAndHeaders | |
400 | cls = NULL; | |
401 | hi = NULL; | |
402 | return clshi.duplicateCount(); | |
403 | } | |
404 | } | |
405 | ||
406 | void getClassesAndHeaders(const char *key, void **cls, void **hi) const | |
407 | { | |
408 | uint32_t h = getIndex(key); | |
409 | if (h == INDEX_NOT_FOUND) return; | |
410 | ||
411 | const objc_classheader_t& clshi = classOffsets()[h]; | |
412 | if (! clshi.isDuplicate()) { | |
413 | // class appears in exactly one header | |
414 | cls[0] = (void *)((const char *)this + clshi.clsOffset); | |
415 | hi[0] = (void *)((const char *)this + clshi.hiOffset); | |
416 | } | |
417 | else { | |
418 | // class appears in more than one header | |
419 | uint32_t count = clshi.duplicateCount(); | |
420 | const objc_classheader_t *list = | |
421 | &duplicateOffsets()[clshi.duplicateIndex()]; | |
422 | for (uint32_t i = 0; i < count; i++) { | |
423 | cls[i] = (void *)((const char *)this + list[i].clsOffset); | |
424 | hi[i] = (void *)((const char *)this + list[i].hiOffset); | |
425 | } | |
426 | } | |
427 | } | |
428 | ||
429 | #ifdef SELOPT_WRITE | |
430 | ||
431 | size_t size() | |
432 | { | |
433 | return | |
434 | objc_stringhash_t::size() | |
435 | + capacity * sizeof(objc_classheader_t) | |
436 | + sizeof(duplicateCount()) | |
437 | + duplicateCount() * sizeof(objc_classheader_t); | |
438 | } | |
439 | ||
440 | void byteswap(bool little_endian) | |
441 | { | |
442 | objc_classheader_t *o; | |
443 | ||
444 | o = classOffsets(); | |
445 | for (uint32_t i = 0; i < capacity; i++) { | |
446 | S32(o[i].clsOffset); | |
447 | S32(o[i].hiOffset); | |
448 | } | |
449 | ||
450 | o = duplicateOffsets(); | |
451 | for (uint32_t i = 0; i < duplicateCount(); i++) { | |
452 | S32(o[i].clsOffset); | |
453 | S32(o[i].hiOffset); | |
454 | } | |
455 | ||
456 | S32(duplicateCount()); | |
457 | ||
458 | objc_stringhash_t::byteswap(little_endian); | |
459 | } | |
460 | ||
461 | const char *write(uint64_t base, size_t remaining, | |
462 | string_map& strings, class_map& classes, bool verbose) | |
463 | { | |
464 | const char *err; | |
465 | err = objc_stringhash_t::write(base, remaining, strings); | |
466 | if (err) return err; | |
467 | ||
468 | if (size() > remaining) { | |
469 | return "selector section too small (metadata not optimized)"; | |
470 | } | |
471 | ||
472 | // Set class offsets to &zero | |
473 | objc_stringhash_offset_t zeroOffset = | |
474 | (objc_stringhash_offset_t)offsetof(objc_stringhash_t, zero); | |
475 | for (uint32_t i = 0; i < capacity; i++) { | |
476 | classOffsets()[i].clsOffset = zeroOffset; | |
477 | classOffsets()[i].hiOffset = zeroOffset; | |
478 | } | |
479 | ||
480 | // Set real class offsets | |
481 | # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t)) | |
482 | class_map::const_iterator c; | |
483 | for (c = classes.begin(); c != classes.end(); ++c) { | |
484 | uint32_t h = getIndex(c->first); | |
485 | if (h == INDEX_NOT_FOUND) { | |
486 | return "class list busted (metadata not optimized)"; | |
487 | } | |
488 | ||
489 | if (classOffsets()[h].clsOffset != zeroOffset) { | |
490 | // already did this class | |
491 | continue; | |
492 | } | |
493 | ||
494 | uint32_t count = classes.count(c->first); | |
495 | if (count == 1) { | |
496 | // only one class with this name | |
497 | ||
498 | int64_t coff = c->second.first - base; | |
499 | int64_t hoff = c->second.second - base; | |
500 | if ((coff<<SHIFT)>>SHIFT != coff) { | |
501 | return "class offset too big (metadata not optimized)"; | |
502 | } | |
503 | if ((hoff<<SHIFT)>>SHIFT != hoff) { | |
504 | return "header offset too big (metadata not optimized)"; | |
505 | } | |
506 | ||
507 | classOffsets()[h].clsOffset = (objc_stringhash_offset_t)coff; | |
508 | classOffsets()[h].hiOffset = (objc_stringhash_offset_t)hoff; | |
509 | } | |
510 | else { | |
511 | // class name has duplicates - write them all now | |
512 | if (verbose) { | |
513 | fprintf(stderr, "update_dyld_shared_cache: %u duplicates of Objective-C class %s\n", count, c->first); | |
514 | } | |
515 | ||
516 | uint32_t dest = duplicateCount(); | |
517 | duplicateCount() += count; | |
518 | if (size() > remaining) { | |
519 | return "selector section too small (metadata not optimized)"; | |
520 | } | |
521 | ||
522 | // classOffsets() instead contains count and array index | |
523 | classOffsets()[h].clsOffset = count*2 + 1; | |
524 | classOffsets()[h].hiOffset = dest; | |
525 | ||
526 | std::pair<class_map::const_iterator, class_map::const_iterator> | |
527 | duplicates = classes.equal_range(c->first); | |
528 | class_map::const_iterator dup; | |
529 | for (dup = duplicates.first; dup != duplicates.second; ++dup) { | |
530 | int64_t coff = dup->second.first - base; | |
531 | int64_t hoff = dup->second.second - base; | |
532 | if ((coff<<SHIFT)>>SHIFT != coff) { | |
533 | return "class offset too big (metadata not optimized)"; | |
534 | } | |
535 | if ((hoff<<SHIFT)>>SHIFT != hoff) { | |
536 | return "header offset too big (metadata not optimized)"; | |
537 | } | |
538 | ||
539 | duplicateOffsets()[dest].clsOffset = (objc_stringhash_offset_t)coff; | |
540 | duplicateOffsets()[dest].hiOffset = (objc_stringhash_offset_t)hoff; | |
541 | dest++; | |
542 | } | |
543 | } | |
544 | } | |
545 | # undef SHIFT | |
546 | ||
547 | return NULL; | |
548 | } | |
549 | ||
550 | // SELOPT_WRITE | |
551 | #endif | |
552 | }; | |
553 | ||
554 | // Precomputed image list. | |
555 | struct objc_headeropt_t; | |
556 | ||
557 | // Precomputed class list. | |
558 | struct objc_clsopt_t; | |
559 | ||
560 | // Edit objc-sel-table.s if you change this value. | |
561 | enum { VERSION = 12 }; | |
562 | ||
563 | // Top-level optimization structure. | |
564 | // Edit objc-sel-table.s and OPT_INITIALIZER if you change this structure. | |
565 | struct objc_opt_t { | |
566 | uint32_t version; | |
567 | int32_t selopt_offset; | |
568 | int32_t headeropt_offset; | |
569 | int32_t clsopt_offset; | |
570 | ||
571 | const objc_selopt_t* selopt() const { | |
572 | if (selopt_offset == 0) return NULL; | |
573 | return (objc_selopt_t *)((uint8_t *)this + selopt_offset); | |
574 | } | |
575 | objc_selopt_t* selopt() { | |
576 | if (selopt_offset == 0) return NULL; | |
577 | return (objc_selopt_t *)((uint8_t *)this + selopt_offset); | |
578 | } | |
579 | ||
580 | struct objc_headeropt_t* headeropt() const { | |
581 | if (headeropt_offset == 0) return NULL; | |
582 | return (struct objc_headeropt_t *)((uint8_t *)this + headeropt_offset); | |
583 | } | |
584 | ||
585 | struct objc_clsopt_t* clsopt() const { | |
586 | if (clsopt_offset == 0) return NULL; | |
587 | return (objc_clsopt_t *)((uint8_t *)this + clsopt_offset); | |
588 | } | |
589 | }; | |
590 | ||
591 | // sizeof(objc_opt_t) must be pointer-aligned | |
592 | STATIC_ASSERT(sizeof(objc_opt_t) % sizeof(void*) == 0); | |
593 | ||
594 | // Initializer for empty opt of type uint32_t[]. | |
595 | #define X8(x) x, x, x, x, x, x, x, x | |
596 | #define X64(x) X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x), X8(x) | |
597 | #define X256(x) X64(x), X64(x), X64(x), X64(x) | |
598 | #define OPT_INITIALIZER { \ | |
599 | /* objc_opt_t */ \ | |
600 | objc_opt::VERSION, 16, 0, 0, \ | |
601 | /* objc_selopt_t */ \ | |
602 | 4, 4, 63, 3, 0, 0, 0,0, X256(0), 0, 0, 16, 16, 16, 16 \ | |
603 | /* no objc_headeropt_t */ \ | |
604 | /* no objc_clsopt_t */ \ | |
605 | } | |
606 | ||
607 | ||
608 | /* | |
609 | -------------------------------------------------------------------- | |
610 | mix -- mix 3 64-bit values reversibly. | |
611 | mix() takes 48 machine instructions, but only 24 cycles on a superscalar | |
612 | machine (like Intel's new MMX architecture). It requires 4 64-bit | |
613 | registers for 4::2 parallelism. | |
614 | All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of | |
615 | (a,b,c), and all deltas of bottom bits were tested. All deltas were | |
616 | tested both on random keys and on keys that were nearly all zero. | |
617 | These deltas all cause every bit of c to change between 1/3 and 2/3 | |
618 | of the time (well, only 113/400 to 287/400 of the time for some | |
619 | 2-bit delta). These deltas all cause at least 80 bits to change | |
620 | among (a,b,c) when the mix is run either forward or backward (yes it | |
621 | is reversible). | |
622 | This implies that a hash using mix64 has no funnels. There may be | |
623 | characteristics with 3-bit deltas or bigger, I didn't test for | |
624 | those. | |
625 | -------------------------------------------------------------------- | |
626 | */ | |
627 | #define mix64(a,b,c) \ | |
628 | { \ | |
629 | a -= b; a -= c; a ^= (c>>43); \ | |
630 | b -= c; b -= a; b ^= (a<<9); \ | |
631 | c -= a; c -= b; c ^= (b>>8); \ | |
632 | a -= b; a -= c; a ^= (c>>38); \ | |
633 | b -= c; b -= a; b ^= (a<<23); \ | |
634 | c -= a; c -= b; c ^= (b>>5); \ | |
635 | a -= b; a -= c; a ^= (c>>35); \ | |
636 | b -= c; b -= a; b ^= (a<<49); \ | |
637 | c -= a; c -= b; c ^= (b>>11); \ | |
638 | a -= b; a -= c; a ^= (c>>12); \ | |
639 | b -= c; b -= a; b ^= (a<<18); \ | |
640 | c -= a; c -= b; c ^= (b>>22); \ | |
641 | } | |
642 | ||
643 | /* | |
644 | -------------------------------------------------------------------- | |
645 | hash() -- hash a variable-length key into a 64-bit value | |
646 | k : the key (the unaligned variable-length array of bytes) | |
647 | len : the length of the key, counting by bytes | |
648 | level : can be any 8-byte value | |
649 | Returns a 64-bit value. Every bit of the key affects every bit of | |
650 | the return value. No funnels. Every 1-bit and 2-bit delta achieves | |
651 | avalanche. About 41+5len instructions. | |
652 | ||
653 | The best hash table sizes are powers of 2. There is no need to do | |
654 | mod a prime (mod is sooo slow!). If you need less than 64 bits, | |
655 | use a bitmask. For example, if you need only 10 bits, do | |
656 | h = (h & hashmask(10)); | |
657 | In which case, the hash table should have hashsize(10) elements. | |
658 | ||
659 | If you are hashing n strings (uint8_t **)k, do it like this: | |
660 | for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); | |
661 | ||
662 | By Bob Jenkins, Jan 4 1997. bob_jenkins@burtleburtle.net. You may | |
663 | use this code any way you wish, private, educational, or commercial, | |
664 | but I would appreciate if you give me credit. | |
665 | ||
666 | See http://burtleburtle.net/bob/hash/evahash.html | |
667 | Use for hash table lookup, or anything where one collision in 2^^64 | |
668 | is acceptable. Do NOT use for cryptographic purposes. | |
669 | -------------------------------------------------------------------- | |
670 | */ | |
671 | ||
672 | static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level) | |
673 | // uint8_t *k; /* the key */ | |
674 | // uint64_t length; /* the length of the key */ | |
675 | // uint64_t level; /* the previous hash, or an arbitrary value */ | |
676 | { | |
677 | uint64_t a,b,c; | |
678 | size_t len; | |
679 | ||
680 | /* Set up the internal state */ | |
681 | len = length; | |
682 | a = b = level; /* the previous hash value */ | |
683 | c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */ | |
684 | ||
685 | /*---------------------------------------- handle most of the key */ | |
686 | while (len >= 24) | |
687 | { | |
688 | a += (k[0] +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24) | |
689 | +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56)); | |
690 | b += (k[8] +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24) | |
691 | +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56)); | |
692 | c += (k[16] +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24) | |
693 | +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56)); | |
694 | mix64(a,b,c); | |
695 | k += 24; len -= 24; | |
696 | } | |
697 | ||
698 | /*------------------------------------- handle the last 23 bytes */ | |
699 | c += length; | |
700 | switch(len) /* all the case statements fall through */ | |
701 | { | |
702 | case 23: c+=((uint64_t)k[22]<<56); | |
703 | case 22: c+=((uint64_t)k[21]<<48); | |
704 | case 21: c+=((uint64_t)k[20]<<40); | |
705 | case 20: c+=((uint64_t)k[19]<<32); | |
706 | case 19: c+=((uint64_t)k[18]<<24); | |
707 | case 18: c+=((uint64_t)k[17]<<16); | |
708 | case 17: c+=((uint64_t)k[16]<<8); | |
709 | /* the first byte of c is reserved for the length */ | |
710 | case 16: b+=((uint64_t)k[15]<<56); | |
711 | case 15: b+=((uint64_t)k[14]<<48); | |
712 | case 14: b+=((uint64_t)k[13]<<40); | |
713 | case 13: b+=((uint64_t)k[12]<<32); | |
714 | case 12: b+=((uint64_t)k[11]<<24); | |
715 | case 11: b+=((uint64_t)k[10]<<16); | |
716 | case 10: b+=((uint64_t)k[ 9]<<8); | |
717 | case 9: b+=((uint64_t)k[ 8]); | |
718 | case 8: a+=((uint64_t)k[ 7]<<56); | |
719 | case 7: a+=((uint64_t)k[ 6]<<48); | |
720 | case 6: a+=((uint64_t)k[ 5]<<40); | |
721 | case 5: a+=((uint64_t)k[ 4]<<32); | |
722 | case 4: a+=((uint64_t)k[ 3]<<24); | |
723 | case 3: a+=((uint64_t)k[ 2]<<16); | |
724 | case 2: a+=((uint64_t)k[ 1]<<8); | |
725 | case 1: a+=((uint64_t)k[ 0]); | |
726 | /* case 0: nothing left to add */ | |
727 | } | |
728 | mix64(a,b,c); | |
729 | /*-------------------------------------------- report the result */ | |
730 | return c; | |
731 | } | |
732 | ||
733 | ||
734 | #ifdef SELOPT_WRITE | |
735 | ||
736 | /* | |
737 | ------------------------------------------------------------------------------ | |
738 | This generates a minimal perfect hash function. That means, given a | |
739 | set of n keys, this determines a hash function that maps each of | |
740 | those keys into a value in 0..n-1 with no collisions. | |
741 | ||
742 | The perfect hash function first uses a normal hash function on the key | |
743 | to determine (a,b) such that the pair (a,b) is distinct for all | |
744 | keys, then it computes a^scramble[tab[b]] to get the final perfect hash. | |
745 | tab[] is an array of 1-byte values and scramble[] is a 256-term array of | |
746 | 2-byte or 4-byte values. If there are n keys, the length of tab[] is a | |
747 | power of two between n/3 and n. | |
748 | ||
749 | I found the idea of computing distinct (a,b) values in "Practical minimal | |
750 | perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, | |
751 | Communications of the ACM, January 1992. They found the idea in Chichelli | |
752 | (CACM Jan 1980). Beyond that, our methods differ. | |
753 | ||
754 | The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in | |
755 | 0..*blen*-1. A fast hash function determines both a and b | |
756 | simultaneously. Any decent hash function is likely to produce | |
757 | hashes so that (a,b) is distinct for all pairs. I try the hash | |
758 | using different values of *salt* until all pairs are distinct. | |
759 | ||
760 | The final hash is (a XOR scramble[tab[b]]). *scramble* is a | |
761 | predetermined mapping of 0..255 into 0..smax-1. *tab* is an | |
762 | array that we fill in in such a way as to make the hash perfect. | |
763 | ||
764 | First we fill in all values of *tab* that are used by more than one | |
765 | key. We try all possible values for each position until one works. | |
766 | ||
767 | This leaves m unmapped keys and m values that something could hash to. | |
768 | If you treat unmapped keys as lefthand nodes and unused hash values | |
769 | as righthand nodes, and draw a line connecting each key to each hash | |
770 | value it could map to, you get a bipartite graph. We attempt to | |
771 | find a perfect matching in this graph. If we succeed, we have | |
772 | determined a perfect hash for the whole set of keys. | |
773 | ||
774 | *scramble* is used because (a^tab[i]) clusters keys around *a*. | |
775 | ------------------------------------------------------------------------------ | |
776 | */ | |
777 | ||
778 | typedef uint64_t ub8; | |
779 | #define UB8MAXVAL 0xffffffffffffffffLL | |
780 | #define UB8BITS 64 | |
781 | typedef uint32_t ub4; | |
782 | #define UB4MAXVAL 0xffffffff | |
783 | #define UB4BITS 32 | |
784 | typedef uint16_t ub2; | |
785 | #define UB2MAXVAL 0xffff | |
786 | #define UB2BITS 16 | |
787 | typedef uint8_t ub1; | |
788 | #define UB1MAXVAL 0xff | |
789 | #define UB1BITS 8 | |
790 | ||
791 | #define TRUE 1 | |
792 | #define FALSE 0 | |
793 | ||
794 | #define SCRAMBLE_LEN 256 // ((ub4)1<<16) /* length of *scramble* */ | |
795 | #define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */ | |
796 | #define RETRY_PERFECT 4 /* number of times to try to make a perfect hash */ | |
797 | ||
798 | ||
799 | /* representation of a key */ | |
800 | struct key | |
801 | { | |
802 | ub1 *name_k; /* the actual key */ | |
803 | ub4 len_k; /* the length of the actual key */ | |
804 | ub4 hash_k; /* the initial hash value for this key */ | |
805 | /* beyond this point is mapping-dependent */ | |
806 | ub4 a_k; /* a, of the key maps to (a,b) */ | |
807 | ub4 b_k; /* b, of the key maps to (a,b) */ | |
808 | struct key *nextb_k; /* next key with this b */ | |
809 | }; | |
810 | typedef struct key key; | |
811 | ||
812 | /* things indexed by b of original (a,b) pair */ | |
813 | struct bstuff | |
814 | { | |
815 | ub2 val_b; /* hash=a^tabb[b].val_b */ | |
816 | key *list_b; /* tabb[i].list_b is list of keys with b==i */ | |
817 | ub4 listlen_b; /* length of list_b */ | |
818 | ub4 water_b; /* high watermark of who has visited this map node */ | |
819 | }; | |
820 | typedef struct bstuff bstuff; | |
821 | ||
822 | /* things indexed by final hash value */ | |
823 | struct hstuff | |
824 | { | |
825 | key *key_h; /* tabh[i].key_h is the key with a hash of i */ | |
826 | }; | |
827 | typedef struct hstuff hstuff; | |
828 | ||
829 | /* things indexed by queue position */ | |
830 | struct qstuff | |
831 | { | |
832 | bstuff *b_q; /* b that currently occupies this hash */ | |
833 | ub4 parent_q; /* queue position of parent that could use this hash */ | |
834 | ub2 newval_q; /* what to change parent tab[b] to to use this hash */ | |
835 | ub2 oldval_q; /* original value of tab[b] */ | |
836 | }; | |
837 | typedef struct qstuff qstuff; | |
838 | ||
839 | ||
840 | /* | |
841 | ------------------------------------------------------------------------------ | |
842 | Find the mapping that will produce a perfect hash | |
843 | ------------------------------------------------------------------------------ | |
844 | */ | |
845 | ||
846 | /* return the ceiling of the log (base 2) of val */ | |
847 | static ub4 log2u(ub4 val) | |
848 | { | |
849 | ub4 i; | |
850 | for (i=0; ((ub4)1<<i) < val; ++i) | |
851 | ; | |
852 | return i; | |
853 | } | |
854 | ||
855 | /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */ | |
856 | /* permute(0)=0. This is intended and useful. */ | |
857 | static ub4 permute(ub4 x, ub4 nbits) | |
858 | // ub4 x; /* input, a value in some range */ | |
859 | // ub4 nbits; /* input, number of bits in range */ | |
860 | { | |
861 | int i; | |
862 | int mask = ((ub4)1<<nbits)-1; /* all ones */ | |
863 | int const2 = 1+nbits/2; | |
864 | int const3 = 1+nbits/3; | |
865 | int const4 = 1+nbits/4; | |
866 | int const5 = 1+nbits/5; | |
867 | for (i=0; i<20; ++i) | |
868 | { | |
869 | x = (x+(x<<const2)) & mask; | |
870 | x = (x^(x>>const3)); | |
871 | x = (x+(x<<const4)) & mask; | |
872 | x = (x^(x>>const5)); | |
873 | } | |
874 | return x; | |
875 | } | |
876 | ||
877 | /* initialize scramble[] with distinct random values in 0..smax-1 */ | |
878 | static void scrambleinit(ub4 *scramble, ub4 smax) | |
879 | // ub4 *scramble; /* hash is a^scramble[tab[b]] */ | |
880 | // ub4 smax; /* scramble values should be in 0..smax-1 */ | |
881 | { | |
882 | ub4 i; | |
883 | ||
884 | /* fill scramble[] with distinct random integers in 0..smax-1 */ | |
885 | for (i=0; i<SCRAMBLE_LEN; ++i) | |
886 | { | |
887 | scramble[i] = permute(i, log2u(smax)); | |
888 | } | |
889 | } | |
890 | ||
891 | ||
892 | /* | |
893 | * put keys in tabb according to key->b_k | |
894 | * check if the initial hash might work | |
895 | */ | |
896 | static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete) | |
897 | // bstuff *tabb; /* output, list of keys with b for (a,b) */ | |
898 | // ub4 blen; /* length of tabb */ | |
899 | // key *keys; /* list of keys already hashed */ | |
900 | // int complete; /* TRUE means to complete init despite collisions */ | |
901 | { | |
902 | int nocollision = TRUE; | |
903 | ub4 i; | |
904 | ||
905 | memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen)); | |
906 | ||
907 | /* Two keys with the same (a,b) guarantees a collision */ | |
908 | for (i = 0; i < nkeys; i++) { | |
909 | key *mykey = keys+i; | |
910 | key *otherkey; | |
911 | ||
912 | for (otherkey=tabb[mykey->b_k].list_b; | |
913 | otherkey; | |
914 | otherkey=otherkey->nextb_k) | |
915 | { | |
916 | if (mykey->a_k == otherkey->a_k) | |
917 | { | |
918 | nocollision = FALSE; | |
919 | if (!complete) | |
920 | return FALSE; | |
921 | } | |
922 | } | |
923 | ++tabb[mykey->b_k].listlen_b; | |
924 | mykey->nextb_k = tabb[mykey->b_k].list_b; | |
925 | tabb[mykey->b_k].list_b = mykey; | |
926 | } | |
927 | ||
928 | /* no two keys have the same (a,b) pair */ | |
929 | return nocollision; | |
930 | } | |
931 | ||
932 | ||
933 | /* Do the initial hash for normal mode (use lookup and checksum) */ | |
934 | static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt) | |
935 | // key *keys; /* list of all keys */ | |
936 | // ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */ | |
937 | // ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */ | |
938 | // ub4 smax; /* maximum range of computable hash values */ | |
939 | // ub4 salt; /* used to initialize the hash function */ | |
940 | // gencode *final; /* output, code for the final hash */ | |
941 | { | |
942 | ub4 loga = log2u(alen); /* log based 2 of blen */ | |
943 | ub4 i; | |
944 | for (i = 0; i < nkeys; i++) { | |
945 | key *mykey = keys+i; | |
946 | ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt); | |
947 | mykey->a_k = (loga > 0) ? hash>>(UB8BITS-loga) : 0; | |
948 | mykey->b_k = (blen > 1) ? hash&(blen-1) : 0; | |
949 | } | |
950 | } | |
951 | ||
952 | ||
953 | /* Try to apply an augmenting list */ | |
954 | static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback) | |
955 | // bstuff *tabb; | |
956 | // hstuff *tabh; | |
957 | // qstuff *tabq; | |
958 | // ub4 blen; | |
959 | // ub4 *scramble; | |
960 | // ub4 tail; | |
961 | // int rollback; /* FALSE applies augmenting path, TRUE rolls back */ | |
962 | { | |
963 | ub4 hash; | |
964 | key *mykey; | |
965 | bstuff *pb; | |
966 | ub4 child; | |
967 | ub4 parent; | |
968 | ub4 stabb; /* scramble[tab[b]] */ | |
969 | ||
970 | /* walk from child to parent */ | |
971 | for (child=tail-1; child; child=parent) | |
972 | { | |
973 | parent = tabq[child].parent_q; /* find child's parent */ | |
974 | pb = tabq[parent].b_q; /* find parent's list of siblings */ | |
975 | ||
976 | /* erase old hash values */ | |
977 | stabb = scramble[pb->val_b]; | |
978 | for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) | |
979 | { | |
980 | hash = mykey->a_k^stabb; | |
981 | if (mykey == tabh[hash].key_h) | |
982 | { /* erase hash for all of child's siblings */ | |
983 | tabh[hash].key_h = (key *)0; | |
984 | } | |
985 | } | |
986 | ||
987 | /* change pb->val_b, which will change the hashes of all parent siblings */ | |
988 | pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q); | |
989 | ||
990 | /* set new hash values */ | |
991 | stabb = scramble[pb->val_b]; | |
992 | for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) | |
993 | { | |
994 | hash = mykey->a_k^stabb; | |
995 | if (rollback) | |
996 | { | |
997 | if (parent == 0) continue; /* root never had a hash */ | |
998 | } | |
999 | else if (tabh[hash].key_h) | |
1000 | { | |
1001 | /* very rare: roll back any changes */ | |
1002 | apply(tabb, tabh, tabq, blen, scramble, tail, TRUE); | |
1003 | return FALSE; /* failure, collision */ | |
1004 | } | |
1005 | tabh[hash].key_h = mykey; | |
1006 | } | |
1007 | } | |
1008 | return TRUE; | |
1009 | } | |
1010 | ||
1011 | ||
1012 | /* | |
1013 | ------------------------------------------------------------------------------- | |
1014 | augment(): Add item to the mapping. | |
1015 | ||
1016 | Construct a spanning tree of *b*s with *item* as root, where each | |
1017 | parent can have all its hashes changed (by some new val_b) with | |
1018 | at most one collision, and each child is the b of that collision. | |
1019 | ||
1020 | I got this from Tarjan's "Data Structures and Network Algorithms". The | |
1021 | path from *item* to a *b* that can be remapped with no collision is | |
1022 | an "augmenting path". Change values of tab[b] along the path so that | |
1023 | the unmapped key gets mapped and the unused hash value gets used. | |
1024 | ||
1025 | Assuming 1 key per b, if m out of n hash values are still unused, | |
1026 | you should expect the transitive closure to cover n/m nodes before | |
1027 | an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect | |
1028 | this approach to take about nlogn time to map all single-key b's. | |
1029 | ------------------------------------------------------------------------------- | |
1030 | */ | |
1031 | static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys, | |
1032 | ub4 highwater) | |
1033 | // bstuff *tabb; /* stuff indexed by b */ | |
1034 | // hstuff *tabh; /* which key is associated with which hash, indexed by hash */ | |
1035 | // qstuff *tabq; /* queue of *b* values, this is the spanning tree */ | |
1036 | // ub4 blen; /* length of tabb */ | |
1037 | // ub4 *scramble; /* final hash is a^scramble[tab[b]] */ | |
1038 | // ub4 smax; /* highest value in scramble */ | |
1039 | // bstuff *item; /* &tabb[b] for the b to be mapped */ | |
1040 | // ub4 nkeys; /* final hash must be in 0..nkeys-1 */ | |
1041 | // ub4 highwater; /* a value higher than any now in tabb[].water_b */ | |
1042 | { | |
1043 | ub4 q; /* current position walking through the queue */ | |
1044 | ub4 tail; /* tail of the queue. 0 is the head of the queue. */ | |
1045 | ub4 limit=UB1MAXVAL+1; | |
1046 | ub4 highhash = smax; | |
1047 | ||
1048 | /* initialize the root of the spanning tree */ | |
1049 | tabq[0].b_q = item; | |
1050 | tail = 1; | |
1051 | ||
1052 | /* construct the spanning tree by walking the queue, add children to tail */ | |
1053 | for (q=0; q<tail; ++q) | |
1054 | { | |
1055 | bstuff *myb = tabq[q].b_q; /* the b for this node */ | |
1056 | ub4 i; /* possible value for myb->val_b */ | |
1057 | ||
1058 | if (q == 1) | |
1059 | break; /* don't do transitive closure */ | |
1060 | ||
1061 | for (i=0; i<limit; ++i) | |
1062 | { | |
1063 | bstuff *childb = (bstuff *)0; /* the b that this i maps to */ | |
1064 | key *mykey; /* for walking through myb's keys */ | |
1065 | ||
1066 | for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k) | |
1067 | { | |
1068 | key *childkey; | |
1069 | ub4 hash = mykey->a_k^scramble[i]; | |
1070 | ||
1071 | if (hash >= highhash) break; /* out of bounds */ | |
1072 | childkey = tabh[hash].key_h; | |
1073 | ||
1074 | if (childkey) | |
1075 | { | |
1076 | bstuff *hitb = &tabb[childkey->b_k]; | |
1077 | ||
1078 | if (childb) | |
1079 | { | |
1080 | if (childb != hitb) break; /* hit at most one child b */ | |
1081 | } | |
1082 | else | |
1083 | { | |
1084 | childb = hitb; /* remember this as childb */ | |
1085 | if (childb->water_b == highwater) break; /* already explored */ | |
1086 | } | |
1087 | } | |
1088 | } | |
1089 | if (mykey) continue; /* myb with i has multiple collisions */ | |
1090 | ||
1091 | /* add childb to the queue of reachable things */ | |
1092 | if (childb) childb->water_b = highwater; | |
1093 | tabq[tail].b_q = childb; | |
1094 | tabq[tail].newval_q = i; /* how to make parent (myb) use this hash */ | |
1095 | tabq[tail].oldval_q = myb->val_b; /* need this for rollback */ | |
1096 | tabq[tail].parent_q = q; | |
1097 | ++tail; | |
1098 | ||
1099 | if (!childb) | |
1100 | { /* found an *i* with no collisions? */ | |
1101 | /* try to apply the augmenting path */ | |
1102 | if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE)) | |
1103 | return TRUE; /* success, item was added to the perfect hash */ | |
1104 | ||
1105 | --tail; /* don't know how to handle such a child! */ | |
1106 | } | |
1107 | } | |
1108 | } | |
1109 | return FALSE; | |
1110 | } | |
1111 | ||
1112 | ||
1113 | /* find a mapping that makes this a perfect hash */ | |
1114 | static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys) | |
1115 | { | |
1116 | ub4 maxkeys; /* maximum number of keys for any b */ | |
1117 | ub4 i, j; | |
1118 | ||
1119 | #if SELOPT_DEBUG | |
1120 | fprintf(stderr, " blen %d smax %d nkeys %d\n", blen, smax, nkeys); | |
1121 | #endif | |
1122 | ||
1123 | /* clear any state from previous attempts */ | |
1124 | memset((void *)tabh, 0, sizeof(hstuff)*smax); | |
1125 | memset((void *)tabq, 0, sizeof(qstuff)*(blen+1)); | |
1126 | ||
1127 | for (maxkeys=0,i=0; i<blen; ++i) | |
1128 | if (tabb[i].listlen_b > maxkeys) | |
1129 | maxkeys = tabb[i].listlen_b; | |
1130 | ||
1131 | /* In descending order by number of keys, map all *b*s */ | |
1132 | for (j=maxkeys; j>0; --j) | |
1133 | for (i=0; i<blen; ++i) | |
1134 | if (tabb[i].listlen_b == j) | |
1135 | if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys, | |
1136 | i+1)) | |
1137 | { | |
1138 | return FALSE; | |
1139 | } | |
1140 | ||
1141 | /* Success! We found a perfect hash of all keys into 0..nkeys-1. */ | |
1142 | return TRUE; | |
1143 | } | |
1144 | ||
1145 | ||
1146 | /* guess initial values for alen and blen */ | |
1147 | static void initalen(ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys) | |
1148 | // ub4 *alen; /* output, initial alen */ | |
1149 | // ub4 *blen; /* output, initial blen */ | |
1150 | // ub4 smax; /* input, power of two greater or equal to max hash value */ | |
1151 | // ub4 nkeys; /* number of keys being hashed */ | |
1152 | { | |
1153 | /* | |
1154 | * Find initial *alen, *blen | |
1155 | * Initial alen and blen values were found empirically. Some factors: | |
1156 | * | |
1157 | * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1. | |
1158 | * | |
1159 | * alen and blen must be powers of 2 because the values in 0..alen-1 and | |
1160 | * 0..blen-1 are produced by applying a bitmask to the initial hash function. | |
1161 | * | |
1162 | * alen must be less than smax, in fact less than nkeys, because otherwise | |
1163 | * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for | |
1164 | * all the *a*s associated with a given *b*, so there would be no legal | |
1165 | * value to assign to tab[b]. This only matters when we're doing a minimal | |
1166 | * perfect hash. | |
1167 | * | |
1168 | * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8) | |
1169 | * and alen*blen = smax*smax/32. | |
1170 | * | |
1171 | * Values of blen less than smax/4 never work, and smax/2 always works. | |
1172 | * | |
1173 | * We want blen as small as possible because it is the number of bytes in | |
1174 | * the huge array we must create for the perfect hash. | |
1175 | * | |
1176 | * When nkey <= smax*(5/8), blen=smax/4 works much more often with | |
1177 | * alen=smax/8 than with alen=smax/4. Above smax*(5/8), blen=smax/4 | |
1178 | * doesn't seem to care whether alen=smax/8 or alen=smax/4. I think it | |
1179 | * has something to do with 5/8 = 1/8 * 5. For example examine 80000, | |
1180 | * 85000, and 90000 keys with different values of alen. This only matters | |
1181 | * if we're doing a minimal perfect hash. | |
1182 | * | |
1183 | * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer. | |
1184 | * Bigger than that it must produce two integers, which increases the | |
1185 | * cost of the hash per character hashed. | |
1186 | */ | |
1187 | *alen = smax; /* no reason to restrict alen to smax/2 */ | |
1188 | *blen = ((nkeys <= smax*0.6) ? smax/16 : | |
1189 | (nkeys <= smax*0.8) ? smax/8 : smax/4); | |
1190 | ||
1191 | if (*alen < 1) *alen = 1; | |
1192 | if (*blen < 1) *blen = 1; | |
1193 | ||
1194 | #if SELOPT_DEBUG | |
1195 | fprintf(stderr, "alen %d blen %d smax %d nkeys %d\n", *alen, *blen, smax, nkeys); | |
1196 | #endif | |
1197 | } | |
1198 | ||
1199 | /* | |
1200 | ** Try to find a perfect hash function. | |
1201 | ** Return the successful initializer for the initial hash. | |
1202 | ** Return 0 if no perfect hash could be found. | |
1203 | */ | |
1204 | static int findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub8 *salt, | |
1205 | ub4 *scramble, ub4 smax, key *keys, ub4 nkeys) | |
1206 | // bstuff **tabb; /* output, tab[] of the perfect hash, length *blen */ | |
1207 | // ub4 *alen; /* output, 0..alen-1 is range for a of (a,b) */ | |
1208 | // ub4 *blen; /* output, 0..blen-1 is range for b of (a,b) */ | |
1209 | // ub4 *salt; /* output, initializes initial hash */ | |
1210 | // ub4 *scramble; /* input, hash = a^scramble[tab[b]] */ | |
1211 | // ub4 smax; /* input, scramble[i] in 0..smax-1 */ | |
1212 | // key *keys; /* input, keys to hash */ | |
1213 | // ub4 nkeys; /* input, number of keys being hashed */ | |
1214 | { | |
1215 | ub4 bad_initkey; /* how many times did initkey fail? */ | |
1216 | ub4 bad_perfect; /* how many times did perfect fail? */ | |
1217 | ub4 si; /* trial initializer for initial hash */ | |
1218 | ub4 maxalen; | |
1219 | hstuff *tabh; /* table of keys indexed by hash value */ | |
1220 | qstuff *tabq; /* table of stuff indexed by queue value, used by augment */ | |
1221 | ||
1222 | /* guess initial values for alen and blen */ | |
1223 | initalen(alen, blen, smax, nkeys); | |
1224 | ||
1225 | scrambleinit(scramble, smax); | |
1226 | ||
1227 | maxalen = smax; | |
1228 | ||
1229 | /* allocate working memory */ | |
1230 | *tabb = new bstuff[*blen]; | |
1231 | tabq = new qstuff[*blen+1]; | |
1232 | tabh = new hstuff[smax]; | |
1233 | ||
1234 | /* Actually find the perfect hash */ | |
1235 | *salt = 0; | |
1236 | bad_initkey = 0; | |
1237 | bad_perfect = 0; | |
1238 | for (si=1; ; ++si) | |
1239 | { | |
1240 | ub4 rslinit; | |
1241 | /* Try to find distinct (A,B) for all keys */ | |
1242 | *salt = si * 0x9e3779b97f4a7c13LL; /* golden ratio (arbitrary value) */ | |
1243 | initnorm(keys, nkeys, *alen, *blen, smax, *salt); | |
1244 | rslinit = inittab(*tabb, *blen, keys, nkeys, FALSE); | |
1245 | if (rslinit == 0) | |
1246 | { | |
1247 | /* didn't find distinct (a,b) */ | |
1248 | if (++bad_initkey >= RETRY_INITKEY) | |
1249 | { | |
1250 | /* Try to put more bits in (A,B) to make distinct (A,B) more likely */ | |
1251 | if (*alen < maxalen) | |
1252 | { | |
1253 | *alen *= 2; | |
1254 | } | |
1255 | else if (*blen < smax) | |
1256 | { | |
1257 | *blen *= 2; | |
1258 | delete[] tabq; | |
1259 | delete[] *tabb; | |
1260 | *tabb = new bstuff[*blen]; | |
1261 | tabq = new qstuff[*blen+1]; | |
1262 | } | |
1263 | bad_initkey = 0; | |
1264 | bad_perfect = 0; | |
1265 | } | |
1266 | continue; /* two keys have same (a,b) pair */ | |
1267 | } | |
1268 | ||
1269 | /* Given distinct (A,B) for all keys, build a perfect hash */ | |
1270 | if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys)) | |
1271 | { | |
1272 | if (++bad_perfect >= RETRY_PERFECT) | |
1273 | { | |
1274 | if (*blen < smax) | |
1275 | { | |
1276 | *blen *= 2; | |
1277 | delete[] *tabb; | |
1278 | delete[] tabq; | |
1279 | *tabb = new bstuff[*blen]; | |
1280 | tabq = new qstuff[*blen+1]; | |
1281 | --si; /* we know this salt got distinct (A,B) */ | |
1282 | } | |
1283 | else | |
1284 | { | |
1285 | return 0; | |
1286 | } | |
1287 | bad_perfect = 0; | |
1288 | } | |
1289 | continue; | |
1290 | } | |
1291 | ||
1292 | break; | |
1293 | } | |
1294 | ||
1295 | /* free working memory */ | |
1296 | delete[] tabh; | |
1297 | delete[] tabq; | |
1298 | ||
1299 | return 1; | |
1300 | } | |
1301 | ||
1302 | /* | |
1303 | ------------------------------------------------------------------------------ | |
1304 | Input/output type routines | |
1305 | ------------------------------------------------------------------------------ | |
1306 | */ | |
1307 | ||
1308 | /* get the list of keys */ | |
1309 | static void getkeys(key **keys, ub4 *nkeys, const string_map& strings) | |
1310 | { | |
1311 | key *buf = new key[strings.size()]; | |
1312 | size_t i; | |
1313 | string_map::const_iterator s; | |
1314 | for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) { | |
1315 | key *mykey = buf+i; | |
1316 | mykey->name_k = (ub1 *)s->first; | |
1317 | mykey->len_k = (ub4)strlen(s->first); | |
1318 | } | |
1319 | *keys = buf; | |
1320 | *nkeys = strings.size(); | |
1321 | } | |
1322 | ||
1323 | ||
1324 | static perfect_hash | |
1325 | make_perfect(const string_map& strings) | |
1326 | { | |
1327 | ub4 nkeys; /* number of keys */ | |
1328 | key *keys; /* head of list of keys */ | |
1329 | bstuff *tab; /* table indexed by b */ | |
1330 | ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */ | |
1331 | ub4 alen; /* a in 0..alen-1, a power of 2 */ | |
1332 | ub4 blen; /* b in 0..blen-1, a power of 2 */ | |
1333 | ub8 salt; /* a parameter to the hash function */ | |
1334 | ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */ | |
1335 | int ok; | |
1336 | int i; | |
1337 | perfect_hash result; | |
1338 | ||
1339 | /* read in the list of keywords */ | |
1340 | getkeys(&keys, &nkeys, strings); | |
1341 | ||
1342 | /* find the hash */ | |
1343 | smax = ((ub4)1<<log2u(nkeys)); | |
1344 | ok = findhash(&tab, &alen, &blen, &salt, | |
1345 | scramble, smax, keys, nkeys); | |
1346 | if (!ok) { | |
1347 | smax = 2 * ((ub4)1<<log2u(nkeys)); | |
1348 | ok = findhash(&tab, &alen, &blen, &salt, | |
1349 | scramble, smax, keys, nkeys); | |
1350 | } | |
1351 | if (!ok) { | |
1352 | bzero(&result, sizeof(result)); | |
1353 | } else { | |
1354 | /* build the tables */ | |
1355 | result.capacity = smax; | |
1356 | result.occupied = nkeys; | |
1357 | result.shift = UB8BITS - log2u(alen); | |
1358 | result.mask = blen - 1; | |
1359 | result.salt = salt; | |
1360 | ||
1361 | result.tab = new uint8_t[blen]; | |
1362 | for (i = 0; i < blen; i++) { | |
1363 | result.tab[i] = tab[i].val_b; | |
1364 | } | |
1365 | for (i = 0; i < 256; i++) { | |
1366 | result.scramble[i] = scramble[i]; | |
1367 | } | |
1368 | } | |
1369 | ||
1370 | delete[] keys; | |
1371 | delete[] tab; | |
1372 | ||
1373 | return result; | |
1374 | } | |
1375 | ||
1376 | // SELOPT_WRITE | |
1377 | #endif | |
1378 | ||
1379 | // namespace objc_selopt | |
1380 | }; | |
1381 | ||
1382 | #undef S32 | |
1383 | #undef S64 | |
1384 | ||
1385 | #endif |