]>
Commit | Line | Data |
---|---|---|
832b6fce A |
1 | /* |
2 | * Copyright (c) 2008 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | ||
24 | /* | |
25 | Portions derived from: | |
26 | ||
27 | -------------------------------------------------------------------- | |
28 | lookup8.c, by Bob Jenkins, January 4 1997, Public Domain. | |
29 | hash(), hash2(), hash3, and mix() are externally useful functions. | |
30 | Routines to test the hash are included if SELF_TEST is defined. | |
31 | You can use this free for any purpose. It has no warranty. | |
32 | -------------------------------------------------------------------- | |
33 | ||
34 | ------------------------------------------------------------------------------ | |
35 | perfect.c: code to generate code for a hash for perfect hashing. | |
36 | (c) Bob Jenkins, September 1996, December 1999 | |
37 | You may use this code in any way you wish, and it is free. No warranty. | |
38 | I hereby place this in the public domain. | |
39 | Source is http://burtleburtle.net/bob/c/perfect.c | |
40 | ------------------------------------------------------------------------------ | |
41 | */ | |
42 | ||
43 | /* | |
44 | * objc-selopt.h | |
45 | * Interface between libobjc and dyld | |
46 | * for selector uniquing in the dyld shared cache. | |
47 | * | |
48 | * When building the shared cache, dyld locates all selectors and selector | |
49 | * references in the cached images. It builds a perfect hash table out of | |
50 | * them and writes the table into the shared cache copy of libobjc. | |
51 | * libobjc then uses that table as the builtin selector list. | |
52 | * | |
53 | * Versioning | |
54 | * The table has a version number. dyld and objc can both ignore the table | |
55 | * if the other used the wrong version number. | |
56 | * | |
57 | * Completeness | |
58 | * Not all libraries are in the shared cache. Libraries that are in the | |
59 | * shared cache and were optimized are specially marked. Libraries on | |
60 | * disk never include those marks. | |
61 | * | |
62 | * Coherency | |
63 | * Libraries optimized in the shared cache can be replaced by unoptimized | |
64 | * copies from disk when loaded. The copy from disk is not marked and will | |
65 | * be fixed up by libobjc. The shared cache copy is still mapped into the | |
66 | * process, so the table can point to cstring data in that library's part | |
67 | * of the shared cache without trouble. | |
68 | * | |
69 | * Atomicity | |
70 | * dyld writes the table itself last. If dyld marks some metadata as | |
71 | * updated but then fails to write a table for some reason, libobjc | |
72 | * fixes up all metadata as if it were not marked. | |
73 | */ | |
74 | ||
75 | #ifndef _OBJC_SELOPT_H | |
76 | #define _OBJC_SELOPT_H | |
77 | ||
78 | /* | |
79 | DO NOT INCLUDE ANY objc HEADERS HERE | |
80 | dyld USES THIS FILE AND CANNOT SEE THEM | |
81 | */ | |
82 | #include <stdint.h> | |
83 | #include <stdlib.h> | |
84 | #ifdef SELOPT_WRITE | |
2fd3f4e8 | 85 | #include <unordered_map> |
832b6fce A |
86 | #endif |
87 | /* | |
88 | DO NOT INCLUDE ANY objc HEADERS HERE | |
89 | dyld USES THIS FILE AND CANNOT SEE THEM | |
90 | */ | |
91 | ||
92 | #ifndef STATIC_ASSERT | |
93 | # define STATIC_ASSERT(x) _STATIC_ASSERT2(x, __LINE__) | |
94 | # define _STATIC_ASSERT2(x, line) _STATIC_ASSERT3(x, line) | |
95 | # define _STATIC_ASSERT3(x, line) \ | |
96 | typedef struct { \ | |
97 | int _static_assert[(x) ? 0 : -1]; \ | |
98 | } _static_assert_ ## line __attribute__((unavailable)) | |
99 | #endif | |
100 | ||
101 | #define SELOPT_DEBUG 0 | |
102 | ||
103 | #define S32(x) x = little_endian ? OSSwapHostToLittleInt32(x) : OSSwapHostToBigInt32(x) | |
104 | #define S64(x) x = little_endian ? OSSwapHostToLittleInt64(x) : OSSwapHostToBigInt64(x) | |
105 | ||
106 | namespace objc_opt { | |
107 | ||
108 | typedef int32_t objc_stringhash_offset_t; | |
109 | typedef uint8_t objc_stringhash_check_t; | |
110 | ||
2fd3f4e8 A |
111 | static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level); |
112 | ||
832b6fce A |
113 | #ifdef SELOPT_WRITE |
114 | ||
115 | // Perfect hash code is at the end of this file. | |
116 | ||
d113e8b5 | 117 | struct __attribute__((packed)) perfect_hash { |
832b6fce A |
118 | uint32_t capacity; |
119 | uint32_t occupied; | |
120 | uint32_t shift; | |
121 | uint32_t mask; | |
122 | uint64_t salt; | |
d113e8b5 | 123 | |
832b6fce A |
124 | uint32_t scramble[256]; |
125 | uint8_t *tab; // count == mask+1; free with delete[] | |
126 | ||
127 | perfect_hash() : tab(0) { } | |
128 | ||
129 | ~perfect_hash() { if (tab) delete[] tab; } | |
130 | }; | |
131 | ||
132 | struct eqstr { | |
133 | bool operator()(const char* s1, const char* s2) const { | |
134 | return strcmp(s1, s2) == 0; | |
135 | } | |
136 | }; | |
137 | ||
2fd3f4e8 A |
138 | struct hashstr { |
139 | size_t operator()(const char *s) const { | |
140 | return (size_t)lookup8((uint8_t *)s, strlen(s), 0); | |
141 | } | |
142 | }; | |
143 | ||
832b6fce A |
144 | // cstring => cstring's vmaddress |
145 | // (used for selector names and class names) | |
2fd3f4e8 | 146 | typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> string_map; |
832b6fce | 147 | |
df9d6cf7 A |
148 | // protocol name => protocol vmaddress |
149 | typedef std::unordered_map<const char *, uint64_t, hashstr, eqstr> protocol_map; | |
150 | ||
832b6fce | 151 | // class name => (class vmaddress, header_info vmaddress) |
2fd3f4e8 | 152 | typedef std::unordered_multimap<const char *, std::pair<uint64_t, uint64_t>, hashstr, eqstr> class_map; |
832b6fce A |
153 | |
154 | static perfect_hash make_perfect(const string_map& strings); | |
155 | ||
156 | #endif | |
157 | ||
832b6fce A |
158 | |
159 | // Precomputed perfect hash table of strings. | |
160 | // Base class for precomputed selector table and class table. | |
9f83892a | 161 | // Edit objc-sel-table.s if you change this structure. |
d113e8b5 | 162 | struct __attribute__((packed)) objc_stringhash_t { |
832b6fce A |
163 | uint32_t capacity; |
164 | uint32_t occupied; | |
165 | uint32_t shift; | |
166 | uint32_t mask; | |
df9d6cf7 A |
167 | uint32_t unused1; // was zero |
168 | uint32_t unused2; // alignment pad | |
832b6fce | 169 | uint64_t salt; |
d113e8b5 | 170 | |
832b6fce A |
171 | uint32_t scramble[256]; |
172 | uint8_t tab[0]; /* tab[mask+1] (always power-of-2) */ | |
173 | // uint8_t checkbytes[capacity]; /* check byte for each string */ | |
174 | // int32_t offsets[capacity]; /* offsets from &capacity to cstrings */ | |
175 | ||
176 | objc_stringhash_check_t *checkbytes() { return (objc_stringhash_check_t *)&tab[mask+1]; } | |
177 | const objc_stringhash_check_t *checkbytes() const { return (const objc_stringhash_check_t *)&tab[mask+1]; } | |
178 | ||
179 | objc_stringhash_offset_t *offsets() { return (objc_stringhash_offset_t *)&checkbytes()[capacity]; } | |
180 | const objc_stringhash_offset_t *offsets() const { return (const objc_stringhash_offset_t *)&checkbytes()[capacity]; } | |
181 | ||
2fd3f4e8 | 182 | uint32_t hash(const char *key, size_t keylen) const |
832b6fce | 183 | { |
2fd3f4e8 | 184 | uint64_t val = lookup8((uint8_t*)key, keylen, salt); |
832b6fce A |
185 | uint32_t index = (uint32_t)(val>>shift) ^ scramble[tab[val&mask]]; |
186 | return index; | |
187 | } | |
188 | ||
2fd3f4e8 A |
189 | uint32_t hash(const char *key) const |
190 | { | |
191 | return hash(key, strlen(key)); | |
192 | } | |
193 | ||
832b6fce A |
194 | // The check bytes areused to reject strings that aren't in the table |
195 | // without paging in the table's cstring data. This checkbyte calculation | |
196 | // catches 4785/4815 rejects when launching Safari; a perfect checkbyte | |
197 | // would catch 4796/4815. | |
2fd3f4e8 | 198 | objc_stringhash_check_t checkbyte(const char *key, size_t keylen) const |
832b6fce A |
199 | { |
200 | return | |
201 | ((key[0] & 0x7) << 5) | |
202 | | | |
2fd3f4e8 | 203 | ((uint8_t)keylen & 0x1f); |
832b6fce A |
204 | } |
205 | ||
2fd3f4e8 A |
206 | objc_stringhash_check_t checkbyte(const char *key) const |
207 | { | |
208 | return checkbyte(key, strlen(key)); | |
209 | } | |
210 | ||
211 | ||
832b6fce A |
212 | #define INDEX_NOT_FOUND (~(uint32_t)0) |
213 | ||
214 | uint32_t getIndex(const char *key) const | |
215 | { | |
2fd3f4e8 A |
216 | size_t keylen = strlen(key); |
217 | uint32_t h = hash(key, keylen); | |
832b6fce A |
218 | |
219 | // Use check byte to reject without paging in the table's cstrings | |
220 | objc_stringhash_check_t h_check = checkbytes()[h]; | |
2fd3f4e8 | 221 | objc_stringhash_check_t key_check = checkbyte(key, keylen); |
832b6fce A |
222 | bool check_fail = (h_check != key_check); |
223 | #if ! SELOPT_DEBUG | |
224 | if (check_fail) return INDEX_NOT_FOUND; | |
225 | #endif | |
226 | ||
2fd3f4e8 | 227 | objc_stringhash_offset_t offset = offsets()[h]; |
df9d6cf7 | 228 | if (offset == 0) return INDEX_NOT_FOUND; |
2fd3f4e8 | 229 | const char *result = (const char *)this + offset; |
832b6fce A |
230 | if (0 != strcmp(key, result)) return INDEX_NOT_FOUND; |
231 | ||
232 | #if SELOPT_DEBUG | |
233 | if (check_fail) abort(); | |
234 | #endif | |
235 | ||
236 | return h; | |
237 | } | |
238 | ||
239 | #ifdef SELOPT_WRITE | |
240 | ||
241 | size_t size() | |
242 | { | |
243 | return sizeof(objc_stringhash_t) | |
244 | + mask+1 | |
245 | + capacity * sizeof(objc_stringhash_check_t) | |
246 | + capacity * sizeof(objc_stringhash_offset_t); | |
247 | } | |
248 | ||
249 | void byteswap(bool little_endian) | |
250 | { | |
251 | // tab and checkbytes are arrays of bytes, no swap needed | |
252 | for (uint32_t i = 0; i < 256; i++) { | |
253 | S32(scramble[i]); | |
254 | } | |
255 | objc_stringhash_offset_t *o = offsets(); | |
256 | for (uint32_t i = 0; i < capacity; i++) { | |
257 | S32(o[i]); | |
258 | } | |
259 | ||
260 | S32(capacity); | |
261 | S32(occupied); | |
262 | S32(shift); | |
263 | S32(mask); | |
832b6fce A |
264 | S64(salt); |
265 | } | |
266 | ||
267 | const char *write(uint64_t base, size_t remaining, string_map& strings) | |
268 | { | |
269 | if (sizeof(objc_stringhash_t) > remaining) { | |
270 | return "selector section too small (metadata not optimized)"; | |
271 | } | |
272 | ||
273 | if (strings.size() == 0) { | |
274 | bzero(this, sizeof(objc_stringhash_t)); | |
275 | return NULL; | |
276 | } | |
277 | ||
278 | perfect_hash phash = make_perfect(strings); | |
279 | if (phash.capacity == 0) { | |
280 | return "perfect hash failed (metadata not optimized)"; | |
281 | } | |
282 | ||
283 | // Set header | |
284 | capacity = phash.capacity; | |
285 | occupied = phash.occupied; | |
286 | shift = phash.shift; | |
287 | mask = phash.mask; | |
df9d6cf7 A |
288 | unused1 = 0; |
289 | unused2 = 0; | |
832b6fce A |
290 | salt = phash.salt; |
291 | ||
292 | if (size() > remaining) { | |
293 | return "selector section too small (metadata not optimized)"; | |
294 | } | |
295 | ||
296 | // Set hash data | |
297 | for (uint32_t i = 0; i < 256; i++) { | |
298 | scramble[i] = phash.scramble[i]; | |
299 | } | |
300 | for (uint32_t i = 0; i < phash.mask+1; i++) { | |
301 | tab[i] = phash.tab[i]; | |
302 | } | |
303 | ||
df9d6cf7 | 304 | // Set offsets to 0 |
832b6fce | 305 | for (uint32_t i = 0; i < phash.capacity; i++) { |
df9d6cf7 | 306 | offsets()[i] = 0; |
832b6fce A |
307 | } |
308 | // Set checkbytes to 0 | |
309 | for (uint32_t i = 0; i < phash.capacity; i++) { | |
310 | checkbytes()[i] = 0; | |
311 | } | |
312 | ||
313 | // Set real string offsets and checkbytes | |
314 | # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t)) | |
315 | string_map::const_iterator s; | |
316 | for (s = strings.begin(); s != strings.end(); ++s) { | |
317 | int64_t offset = s->second - base; | |
318 | if ((offset<<SHIFT)>>SHIFT != offset) { | |
319 | return "selector offset too big (metadata not optimized)"; | |
320 | } | |
321 | ||
322 | uint32_t h = hash(s->first); | |
323 | offsets()[h] = (objc_stringhash_offset_t)offset; | |
324 | checkbytes()[h] = checkbyte(s->first); | |
325 | } | |
326 | # undef SHIFT | |
327 | ||
328 | return NULL; | |
329 | } | |
330 | ||
331 | // SELOPT_WRITE | |
332 | #endif | |
333 | }; | |
334 | ||
335 | ||
336 | // Precomputed selector table. | |
9f83892a | 337 | // Edit objc-sel-table.s if you change this structure. |
832b6fce A |
338 | struct objc_selopt_t : objc_stringhash_t { |
339 | const char *get(const char *key) const | |
340 | { | |
341 | uint32_t h = getIndex(key); | |
342 | if (h == INDEX_NOT_FOUND) return NULL; | |
343 | ||
344 | return (const char *)this + offsets()[h]; | |
345 | } | |
346 | }; | |
347 | ||
348 | // Precomputed class list. | |
9f83892a | 349 | // Edit objc-sel-table.s if you change these structures. |
832b6fce A |
350 | |
351 | struct objc_classheader_t { | |
352 | objc_stringhash_offset_t clsOffset; | |
353 | objc_stringhash_offset_t hiOffset; | |
354 | ||
355 | // For duplicate class names: | |
356 | // clsOffset = count<<1 | 1 | |
357 | // duplicated classes are duplicateOffsets[hiOffset..hiOffset+count-1] | |
358 | bool isDuplicate() const { return clsOffset & 1; } | |
359 | uint32_t duplicateCount() const { return clsOffset >> 1; } | |
360 | uint32_t duplicateIndex() const { return hiOffset; } | |
361 | }; | |
362 | ||
363 | ||
364 | struct objc_clsopt_t : objc_stringhash_t { | |
365 | // ...objc_stringhash_t fields... | |
366 | // objc_classheader_t classOffsets[capacity]; /* offsets from &capacity to class_t and header_info */ | |
367 | // uint32_t duplicateCount; | |
368 | // objc_classheader_t duplicateOffsets[duplicatedClasses]; | |
369 | ||
370 | objc_classheader_t *classOffsets() { return (objc_classheader_t *)&offsets()[capacity]; } | |
371 | const objc_classheader_t *classOffsets() const { return (const objc_classheader_t *)&offsets()[capacity]; } | |
372 | ||
373 | uint32_t& duplicateCount() { return *(uint32_t *)&classOffsets()[capacity]; } | |
374 | const uint32_t& duplicateCount() const { return *(const uint32_t *)&classOffsets()[capacity]; } | |
375 | ||
376 | objc_classheader_t *duplicateOffsets() { return (objc_classheader_t *)(&duplicateCount()+1); } | |
377 | const objc_classheader_t *duplicateOffsets() const { return (const objc_classheader_t *)(&duplicateCount()+1); } | |
378 | ||
379 | // 0/NULL/NULL: not found | |
380 | // 1/ptr/ptr: found exactly one | |
381 | // n/NULL/NULL: found N - use getClassesAndHeaders() instead | |
382 | uint32_t getClassAndHeader(const char *key, void*& cls, void*& hi) const | |
383 | { | |
384 | uint32_t h = getIndex(key); | |
385 | if (h == INDEX_NOT_FOUND) { | |
386 | cls = NULL; | |
387 | hi = NULL; | |
388 | return 0; | |
389 | } | |
390 | ||
391 | const objc_classheader_t& clshi = classOffsets()[h]; | |
392 | if (! clshi.isDuplicate()) { | |
393 | // class appears in exactly one header | |
394 | cls = (void *)((const char *)this + clshi.clsOffset); | |
395 | hi = (void *)((const char *)this + clshi.hiOffset); | |
396 | return 1; | |
397 | } | |
398 | else { | |
399 | // class appears in more than one header - use getClassesAndHeaders | |
400 | cls = NULL; | |
401 | hi = NULL; | |
402 | return clshi.duplicateCount(); | |
403 | } | |
404 | } | |
405 | ||
406 | void getClassesAndHeaders(const char *key, void **cls, void **hi) const | |
407 | { | |
408 | uint32_t h = getIndex(key); | |
409 | if (h == INDEX_NOT_FOUND) return; | |
410 | ||
411 | const objc_classheader_t& clshi = classOffsets()[h]; | |
412 | if (! clshi.isDuplicate()) { | |
413 | // class appears in exactly one header | |
414 | cls[0] = (void *)((const char *)this + clshi.clsOffset); | |
415 | hi[0] = (void *)((const char *)this + clshi.hiOffset); | |
416 | } | |
417 | else { | |
418 | // class appears in more than one header | |
419 | uint32_t count = clshi.duplicateCount(); | |
420 | const objc_classheader_t *list = | |
421 | &duplicateOffsets()[clshi.duplicateIndex()]; | |
422 | for (uint32_t i = 0; i < count; i++) { | |
423 | cls[i] = (void *)((const char *)this + list[i].clsOffset); | |
424 | hi[i] = (void *)((const char *)this + list[i].hiOffset); | |
425 | } | |
426 | } | |
427 | } | |
428 | ||
429 | #ifdef SELOPT_WRITE | |
430 | ||
431 | size_t size() | |
432 | { | |
433 | return | |
434 | objc_stringhash_t::size() | |
435 | + capacity * sizeof(objc_classheader_t) | |
436 | + sizeof(duplicateCount()) | |
437 | + duplicateCount() * sizeof(objc_classheader_t); | |
438 | } | |
439 | ||
440 | void byteswap(bool little_endian) | |
441 | { | |
442 | objc_classheader_t *o; | |
443 | ||
444 | o = classOffsets(); | |
445 | for (uint32_t i = 0; i < capacity; i++) { | |
446 | S32(o[i].clsOffset); | |
447 | S32(o[i].hiOffset); | |
448 | } | |
449 | ||
450 | o = duplicateOffsets(); | |
451 | for (uint32_t i = 0; i < duplicateCount(); i++) { | |
452 | S32(o[i].clsOffset); | |
453 | S32(o[i].hiOffset); | |
454 | } | |
455 | ||
456 | S32(duplicateCount()); | |
457 | ||
458 | objc_stringhash_t::byteswap(little_endian); | |
459 | } | |
460 | ||
461 | const char *write(uint64_t base, size_t remaining, | |
462 | string_map& strings, class_map& classes, bool verbose) | |
463 | { | |
464 | const char *err; | |
465 | err = objc_stringhash_t::write(base, remaining, strings); | |
466 | if (err) return err; | |
467 | ||
468 | if (size() > remaining) { | |
469 | return "selector section too small (metadata not optimized)"; | |
470 | } | |
471 | ||
df9d6cf7 | 472 | // Set class offsets to 0 |
832b6fce | 473 | for (uint32_t i = 0; i < capacity; i++) { |
df9d6cf7 A |
474 | classOffsets()[i].clsOffset = 0; |
475 | classOffsets()[i].hiOffset = 0; | |
832b6fce A |
476 | } |
477 | ||
478 | // Set real class offsets | |
479 | # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t)) | |
480 | class_map::const_iterator c; | |
481 | for (c = classes.begin(); c != classes.end(); ++c) { | |
482 | uint32_t h = getIndex(c->first); | |
483 | if (h == INDEX_NOT_FOUND) { | |
484 | return "class list busted (metadata not optimized)"; | |
485 | } | |
486 | ||
df9d6cf7 | 487 | if (classOffsets()[h].clsOffset != 0) { |
832b6fce A |
488 | // already did this class |
489 | continue; | |
490 | } | |
491 | ||
9f83892a | 492 | uint32_t count = (uint32_t)classes.count(c->first); |
832b6fce A |
493 | if (count == 1) { |
494 | // only one class with this name | |
495 | ||
496 | int64_t coff = c->second.first - base; | |
497 | int64_t hoff = c->second.second - base; | |
498 | if ((coff<<SHIFT)>>SHIFT != coff) { | |
499 | return "class offset too big (metadata not optimized)"; | |
500 | } | |
501 | if ((hoff<<SHIFT)>>SHIFT != hoff) { | |
502 | return "header offset too big (metadata not optimized)"; | |
503 | } | |
504 | ||
505 | classOffsets()[h].clsOffset = (objc_stringhash_offset_t)coff; | |
506 | classOffsets()[h].hiOffset = (objc_stringhash_offset_t)hoff; | |
507 | } | |
508 | else { | |
509 | // class name has duplicates - write them all now | |
510 | if (verbose) { | |
511 | fprintf(stderr, "update_dyld_shared_cache: %u duplicates of Objective-C class %s\n", count, c->first); | |
512 | } | |
513 | ||
514 | uint32_t dest = duplicateCount(); | |
515 | duplicateCount() += count; | |
516 | if (size() > remaining) { | |
517 | return "selector section too small (metadata not optimized)"; | |
518 | } | |
519 | ||
520 | // classOffsets() instead contains count and array index | |
521 | classOffsets()[h].clsOffset = count*2 + 1; | |
522 | classOffsets()[h].hiOffset = dest; | |
523 | ||
524 | std::pair<class_map::const_iterator, class_map::const_iterator> | |
525 | duplicates = classes.equal_range(c->first); | |
526 | class_map::const_iterator dup; | |
527 | for (dup = duplicates.first; dup != duplicates.second; ++dup) { | |
528 | int64_t coff = dup->second.first - base; | |
529 | int64_t hoff = dup->second.second - base; | |
530 | if ((coff<<SHIFT)>>SHIFT != coff) { | |
531 | return "class offset too big (metadata not optimized)"; | |
532 | } | |
533 | if ((hoff<<SHIFT)>>SHIFT != hoff) { | |
534 | return "header offset too big (metadata not optimized)"; | |
535 | } | |
536 | ||
537 | duplicateOffsets()[dest].clsOffset = (objc_stringhash_offset_t)coff; | |
538 | duplicateOffsets()[dest].hiOffset = (objc_stringhash_offset_t)hoff; | |
539 | dest++; | |
540 | } | |
541 | } | |
542 | } | |
543 | # undef SHIFT | |
544 | ||
545 | return NULL; | |
546 | } | |
547 | ||
548 | // SELOPT_WRITE | |
549 | #endif | |
550 | }; | |
551 | ||
df9d6cf7 A |
552 | |
553 | ||
554 | struct objc_protocolopt_t : objc_stringhash_t { | |
555 | // ...objc_stringhash_t fields... | |
556 | // uint32_t protocolOffsets[capacity]; /* offsets from &capacity to protocol_t */ | |
557 | ||
558 | objc_stringhash_offset_t *protocolOffsets() { return (objc_stringhash_offset_t *)&offsets()[capacity]; } | |
559 | const objc_stringhash_offset_t *protocolOffsets() const { return (const objc_stringhash_offset_t *)&offsets()[capacity]; } | |
560 | ||
561 | void* getProtocol(const char *key) const | |
562 | { | |
563 | uint32_t h = getIndex(key); | |
564 | if (h == INDEX_NOT_FOUND) { | |
565 | return NULL; | |
566 | } | |
567 | ||
568 | return (void *)((const char *)this + protocolOffsets()[h]); | |
569 | } | |
570 | ||
571 | #ifdef SELOPT_WRITE | |
572 | ||
573 | size_t size() | |
574 | { | |
575 | return | |
576 | objc_stringhash_t::size() + capacity * sizeof(objc_stringhash_offset_t); | |
577 | } | |
578 | ||
579 | void byteswap(bool little_endian) | |
580 | { | |
581 | objc_stringhash_offset_t *o; | |
582 | ||
583 | o = protocolOffsets(); | |
584 | for (objc_stringhash_offset_t i = 0; i < capacity; i++) { | |
585 | S32(o[i]); | |
586 | } | |
587 | ||
588 | objc_stringhash_t::byteswap(little_endian); | |
589 | } | |
590 | ||
591 | const char *write(uint64_t base, size_t remaining, | |
592 | string_map& strings, protocol_map& protocols, | |
593 | bool verbose) | |
594 | { | |
595 | const char *err; | |
596 | err = objc_stringhash_t::write(base, remaining, strings); | |
597 | if (err) return err; | |
598 | ||
599 | if (size() > remaining) { | |
600 | return "selector section too small (metadata not optimized)"; | |
601 | } | |
602 | ||
603 | // Set protocol offsets to 0 | |
604 | for (uint32_t i = 0; i < capacity; i++) { | |
605 | protocolOffsets()[i] = 0; | |
606 | } | |
607 | ||
608 | // Set real protocol offsets | |
609 | # define SHIFT (64 - 8*sizeof(objc_stringhash_offset_t)) | |
610 | protocol_map::const_iterator c; | |
611 | for (c = protocols.begin(); c != protocols.end(); ++c) { | |
612 | uint32_t h = getIndex(c->first); | |
613 | if (h == INDEX_NOT_FOUND) { | |
614 | return "protocol list busted (metadata not optimized)"; | |
615 | } | |
616 | ||
617 | int64_t offset = c->second - base; | |
618 | if ((offset<<SHIFT)>>SHIFT != offset) { | |
619 | return "protocol offset too big (metadata not optimized)"; | |
620 | } | |
621 | ||
622 | protocolOffsets()[h] = (objc_stringhash_offset_t)offset; | |
623 | } | |
624 | # undef SHIFT | |
625 | ||
626 | return NULL; | |
627 | } | |
628 | ||
629 | // SELOPT_WRITE | |
630 | #endif | |
631 | }; | |
632 | ||
633 | ||
832b6fce | 634 | // Precomputed image list. |
9f83892a A |
635 | struct objc_headeropt_ro_t; |
636 | ||
637 | // Precomputed image list. | |
638 | struct objc_headeropt_rw_t; | |
832b6fce A |
639 | |
640 | // Precomputed class list. | |
641 | struct objc_clsopt_t; | |
642 | ||
643 | // Edit objc-sel-table.s if you change this value. | |
9f83892a A |
644 | // lldb and Symbolication read these structures. Inform them of any changes. |
645 | enum { VERSION = 15 }; | |
646 | ||
647 | // Values for objc_opt_t::flags | |
648 | enum : uint32_t { | |
649 | IsProduction = (1 << 0), // never set in development cache | |
650 | NoMissingWeakSuperclasses = (1 << 1), // never set in development cache | |
651 | }; | |
832b6fce A |
652 | |
653 | // Top-level optimization structure. | |
9f83892a | 654 | // Edit objc-sel-table.s if you change this structure. |
df9d6cf7 | 655 | struct alignas(alignof(void*)) objc_opt_t { |
832b6fce | 656 | uint32_t version; |
9f83892a | 657 | uint32_t flags; |
832b6fce | 658 | int32_t selopt_offset; |
9f83892a | 659 | int32_t headeropt_ro_offset; |
832b6fce | 660 | int32_t clsopt_offset; |
df9d6cf7 | 661 | int32_t protocolopt_offset; |
9f83892a | 662 | int32_t headeropt_rw_offset; |
832b6fce | 663 | |
9f83892a | 664 | const objc_selopt_t* selopt() const { |
832b6fce A |
665 | if (selopt_offset == 0) return NULL; |
666 | return (objc_selopt_t *)((uint8_t *)this + selopt_offset); | |
667 | } | |
668 | objc_selopt_t* selopt() { | |
669 | if (selopt_offset == 0) return NULL; | |
670 | return (objc_selopt_t *)((uint8_t *)this + selopt_offset); | |
671 | } | |
672 | ||
9f83892a A |
673 | struct objc_headeropt_ro_t* headeropt_ro() const { |
674 | if (headeropt_ro_offset == 0) return NULL; | |
675 | return (struct objc_headeropt_ro_t *)((uint8_t *)this + headeropt_ro_offset); | |
832b6fce A |
676 | } |
677 | ||
678 | struct objc_clsopt_t* clsopt() const { | |
679 | if (clsopt_offset == 0) return NULL; | |
680 | return (objc_clsopt_t *)((uint8_t *)this + clsopt_offset); | |
681 | } | |
df9d6cf7 A |
682 | |
683 | struct objc_protocolopt_t* protocolopt() const { | |
684 | if (protocolopt_offset == 0) return NULL; | |
685 | return (objc_protocolopt_t *)((uint8_t *)this + protocolopt_offset); | |
686 | } | |
9f83892a A |
687 | |
688 | struct objc_headeropt_rw_t* headeropt_rw() const { | |
689 | if (headeropt_rw_offset == 0) return NULL; | |
690 | return (struct objc_headeropt_rw_t *)((uint8_t *)this + headeropt_rw_offset); | |
691 | } | |
832b6fce A |
692 | }; |
693 | ||
694 | // sizeof(objc_opt_t) must be pointer-aligned | |
695 | STATIC_ASSERT(sizeof(objc_opt_t) % sizeof(void*) == 0); | |
696 | ||
832b6fce | 697 | |
df9d6cf7 A |
698 | // List of offsets in libobjc that the shared cache optimization needs to use. |
699 | template <typename T> | |
700 | struct objc_opt_pointerlist_tt { | |
701 | T protocolClass; | |
702 | }; | |
703 | typedef struct objc_opt_pointerlist_tt<uintptr_t> objc_opt_pointerlist_t; | |
704 | ||
705 | ||
832b6fce A |
706 | /* |
707 | -------------------------------------------------------------------- | |
708 | mix -- mix 3 64-bit values reversibly. | |
709 | mix() takes 48 machine instructions, but only 24 cycles on a superscalar | |
710 | machine (like Intel's new MMX architecture). It requires 4 64-bit | |
711 | registers for 4::2 parallelism. | |
712 | All 1-bit deltas, all 2-bit deltas, all deltas composed of top bits of | |
713 | (a,b,c), and all deltas of bottom bits were tested. All deltas were | |
714 | tested both on random keys and on keys that were nearly all zero. | |
715 | These deltas all cause every bit of c to change between 1/3 and 2/3 | |
716 | of the time (well, only 113/400 to 287/400 of the time for some | |
717 | 2-bit delta). These deltas all cause at least 80 bits to change | |
718 | among (a,b,c) when the mix is run either forward or backward (yes it | |
719 | is reversible). | |
720 | This implies that a hash using mix64 has no funnels. There may be | |
721 | characteristics with 3-bit deltas or bigger, I didn't test for | |
722 | those. | |
723 | -------------------------------------------------------------------- | |
724 | */ | |
725 | #define mix64(a,b,c) \ | |
726 | { \ | |
727 | a -= b; a -= c; a ^= (c>>43); \ | |
728 | b -= c; b -= a; b ^= (a<<9); \ | |
729 | c -= a; c -= b; c ^= (b>>8); \ | |
730 | a -= b; a -= c; a ^= (c>>38); \ | |
731 | b -= c; b -= a; b ^= (a<<23); \ | |
732 | c -= a; c -= b; c ^= (b>>5); \ | |
733 | a -= b; a -= c; a ^= (c>>35); \ | |
734 | b -= c; b -= a; b ^= (a<<49); \ | |
735 | c -= a; c -= b; c ^= (b>>11); \ | |
736 | a -= b; a -= c; a ^= (c>>12); \ | |
737 | b -= c; b -= a; b ^= (a<<18); \ | |
738 | c -= a; c -= b; c ^= (b>>22); \ | |
739 | } | |
740 | ||
741 | /* | |
742 | -------------------------------------------------------------------- | |
743 | hash() -- hash a variable-length key into a 64-bit value | |
744 | k : the key (the unaligned variable-length array of bytes) | |
745 | len : the length of the key, counting by bytes | |
746 | level : can be any 8-byte value | |
747 | Returns a 64-bit value. Every bit of the key affects every bit of | |
748 | the return value. No funnels. Every 1-bit and 2-bit delta achieves | |
749 | avalanche. About 41+5len instructions. | |
750 | ||
751 | The best hash table sizes are powers of 2. There is no need to do | |
752 | mod a prime (mod is sooo slow!). If you need less than 64 bits, | |
753 | use a bitmask. For example, if you need only 10 bits, do | |
754 | h = (h & hashmask(10)); | |
755 | In which case, the hash table should have hashsize(10) elements. | |
756 | ||
757 | If you are hashing n strings (uint8_t **)k, do it like this: | |
758 | for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); | |
759 | ||
760 | By Bob Jenkins, Jan 4 1997. bob_jenkins@burtleburtle.net. You may | |
761 | use this code any way you wish, private, educational, or commercial, | |
762 | but I would appreciate if you give me credit. | |
763 | ||
764 | See http://burtleburtle.net/bob/hash/evahash.html | |
765 | Use for hash table lookup, or anything where one collision in 2^^64 | |
766 | is acceptable. Do NOT use for cryptographic purposes. | |
767 | -------------------------------------------------------------------- | |
768 | */ | |
769 | ||
770 | static uint64_t lookup8( uint8_t *k, size_t length, uint64_t level) | |
771 | // uint8_t *k; /* the key */ | |
772 | // uint64_t length; /* the length of the key */ | |
773 | // uint64_t level; /* the previous hash, or an arbitrary value */ | |
774 | { | |
775 | uint64_t a,b,c; | |
776 | size_t len; | |
777 | ||
778 | /* Set up the internal state */ | |
779 | len = length; | |
780 | a = b = level; /* the previous hash value */ | |
781 | c = 0x9e3779b97f4a7c13LL; /* the golden ratio; an arbitrary value */ | |
782 | ||
783 | /*---------------------------------------- handle most of the key */ | |
784 | while (len >= 24) | |
785 | { | |
786 | a += (k[0] +((uint64_t)k[ 1]<< 8)+((uint64_t)k[ 2]<<16)+((uint64_t)k[ 3]<<24) | |
787 | +((uint64_t)k[4 ]<<32)+((uint64_t)k[ 5]<<40)+((uint64_t)k[ 6]<<48)+((uint64_t)k[ 7]<<56)); | |
788 | b += (k[8] +((uint64_t)k[ 9]<< 8)+((uint64_t)k[10]<<16)+((uint64_t)k[11]<<24) | |
789 | +((uint64_t)k[12]<<32)+((uint64_t)k[13]<<40)+((uint64_t)k[14]<<48)+((uint64_t)k[15]<<56)); | |
790 | c += (k[16] +((uint64_t)k[17]<< 8)+((uint64_t)k[18]<<16)+((uint64_t)k[19]<<24) | |
791 | +((uint64_t)k[20]<<32)+((uint64_t)k[21]<<40)+((uint64_t)k[22]<<48)+((uint64_t)k[23]<<56)); | |
792 | mix64(a,b,c); | |
793 | k += 24; len -= 24; | |
794 | } | |
795 | ||
796 | /*------------------------------------- handle the last 23 bytes */ | |
797 | c += length; | |
798 | switch(len) /* all the case statements fall through */ | |
799 | { | |
800 | case 23: c+=((uint64_t)k[22]<<56); | |
801 | case 22: c+=((uint64_t)k[21]<<48); | |
802 | case 21: c+=((uint64_t)k[20]<<40); | |
803 | case 20: c+=((uint64_t)k[19]<<32); | |
804 | case 19: c+=((uint64_t)k[18]<<24); | |
805 | case 18: c+=((uint64_t)k[17]<<16); | |
806 | case 17: c+=((uint64_t)k[16]<<8); | |
807 | /* the first byte of c is reserved for the length */ | |
808 | case 16: b+=((uint64_t)k[15]<<56); | |
809 | case 15: b+=((uint64_t)k[14]<<48); | |
810 | case 14: b+=((uint64_t)k[13]<<40); | |
811 | case 13: b+=((uint64_t)k[12]<<32); | |
812 | case 12: b+=((uint64_t)k[11]<<24); | |
813 | case 11: b+=((uint64_t)k[10]<<16); | |
814 | case 10: b+=((uint64_t)k[ 9]<<8); | |
815 | case 9: b+=((uint64_t)k[ 8]); | |
816 | case 8: a+=((uint64_t)k[ 7]<<56); | |
817 | case 7: a+=((uint64_t)k[ 6]<<48); | |
818 | case 6: a+=((uint64_t)k[ 5]<<40); | |
819 | case 5: a+=((uint64_t)k[ 4]<<32); | |
820 | case 4: a+=((uint64_t)k[ 3]<<24); | |
821 | case 3: a+=((uint64_t)k[ 2]<<16); | |
822 | case 2: a+=((uint64_t)k[ 1]<<8); | |
823 | case 1: a+=((uint64_t)k[ 0]); | |
824 | /* case 0: nothing left to add */ | |
825 | } | |
826 | mix64(a,b,c); | |
827 | /*-------------------------------------------- report the result */ | |
828 | return c; | |
829 | } | |
830 | ||
831 | ||
832 | #ifdef SELOPT_WRITE | |
833 | ||
834 | /* | |
835 | ------------------------------------------------------------------------------ | |
836 | This generates a minimal perfect hash function. That means, given a | |
837 | set of n keys, this determines a hash function that maps each of | |
838 | those keys into a value in 0..n-1 with no collisions. | |
839 | ||
840 | The perfect hash function first uses a normal hash function on the key | |
841 | to determine (a,b) such that the pair (a,b) is distinct for all | |
842 | keys, then it computes a^scramble[tab[b]] to get the final perfect hash. | |
843 | tab[] is an array of 1-byte values and scramble[] is a 256-term array of | |
844 | 2-byte or 4-byte values. If there are n keys, the length of tab[] is a | |
845 | power of two between n/3 and n. | |
846 | ||
847 | I found the idea of computing distinct (a,b) values in "Practical minimal | |
848 | perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, | |
849 | Communications of the ACM, January 1992. They found the idea in Chichelli | |
850 | (CACM Jan 1980). Beyond that, our methods differ. | |
851 | ||
852 | The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in | |
853 | 0..*blen*-1. A fast hash function determines both a and b | |
854 | simultaneously. Any decent hash function is likely to produce | |
855 | hashes so that (a,b) is distinct for all pairs. I try the hash | |
856 | using different values of *salt* until all pairs are distinct. | |
857 | ||
858 | The final hash is (a XOR scramble[tab[b]]). *scramble* is a | |
859 | predetermined mapping of 0..255 into 0..smax-1. *tab* is an | |
860 | array that we fill in in such a way as to make the hash perfect. | |
861 | ||
862 | First we fill in all values of *tab* that are used by more than one | |
863 | key. We try all possible values for each position until one works. | |
864 | ||
865 | This leaves m unmapped keys and m values that something could hash to. | |
866 | If you treat unmapped keys as lefthand nodes and unused hash values | |
867 | as righthand nodes, and draw a line connecting each key to each hash | |
868 | value it could map to, you get a bipartite graph. We attempt to | |
869 | find a perfect matching in this graph. If we succeed, we have | |
870 | determined a perfect hash for the whole set of keys. | |
871 | ||
872 | *scramble* is used because (a^tab[i]) clusters keys around *a*. | |
873 | ------------------------------------------------------------------------------ | |
874 | */ | |
875 | ||
876 | typedef uint64_t ub8; | |
877 | #define UB8MAXVAL 0xffffffffffffffffLL | |
878 | #define UB8BITS 64 | |
879 | typedef uint32_t ub4; | |
880 | #define UB4MAXVAL 0xffffffff | |
881 | #define UB4BITS 32 | |
882 | typedef uint16_t ub2; | |
883 | #define UB2MAXVAL 0xffff | |
884 | #define UB2BITS 16 | |
885 | typedef uint8_t ub1; | |
886 | #define UB1MAXVAL 0xff | |
887 | #define UB1BITS 8 | |
888 | ||
889 | #define TRUE 1 | |
890 | #define FALSE 0 | |
891 | ||
892 | #define SCRAMBLE_LEN 256 // ((ub4)1<<16) /* length of *scramble* */ | |
893 | #define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */ | |
894 | #define RETRY_PERFECT 4 /* number of times to try to make a perfect hash */ | |
895 | ||
896 | ||
897 | /* representation of a key */ | |
898 | struct key | |
899 | { | |
900 | ub1 *name_k; /* the actual key */ | |
901 | ub4 len_k; /* the length of the actual key */ | |
902 | ub4 hash_k; /* the initial hash value for this key */ | |
903 | /* beyond this point is mapping-dependent */ | |
904 | ub4 a_k; /* a, of the key maps to (a,b) */ | |
905 | ub4 b_k; /* b, of the key maps to (a,b) */ | |
906 | struct key *nextb_k; /* next key with this b */ | |
907 | }; | |
908 | typedef struct key key; | |
909 | ||
910 | /* things indexed by b of original (a,b) pair */ | |
911 | struct bstuff | |
912 | { | |
913 | ub2 val_b; /* hash=a^tabb[b].val_b */ | |
914 | key *list_b; /* tabb[i].list_b is list of keys with b==i */ | |
915 | ub4 listlen_b; /* length of list_b */ | |
916 | ub4 water_b; /* high watermark of who has visited this map node */ | |
917 | }; | |
918 | typedef struct bstuff bstuff; | |
919 | ||
920 | /* things indexed by final hash value */ | |
921 | struct hstuff | |
922 | { | |
923 | key *key_h; /* tabh[i].key_h is the key with a hash of i */ | |
924 | }; | |
925 | typedef struct hstuff hstuff; | |
926 | ||
927 | /* things indexed by queue position */ | |
928 | struct qstuff | |
929 | { | |
930 | bstuff *b_q; /* b that currently occupies this hash */ | |
931 | ub4 parent_q; /* queue position of parent that could use this hash */ | |
932 | ub2 newval_q; /* what to change parent tab[b] to to use this hash */ | |
933 | ub2 oldval_q; /* original value of tab[b] */ | |
934 | }; | |
935 | typedef struct qstuff qstuff; | |
936 | ||
937 | ||
938 | /* | |
939 | ------------------------------------------------------------------------------ | |
940 | Find the mapping that will produce a perfect hash | |
941 | ------------------------------------------------------------------------------ | |
942 | */ | |
943 | ||
944 | /* return the ceiling of the log (base 2) of val */ | |
945 | static ub4 log2u(ub4 val) | |
946 | { | |
947 | ub4 i; | |
948 | for (i=0; ((ub4)1<<i) < val; ++i) | |
949 | ; | |
950 | return i; | |
951 | } | |
952 | ||
953 | /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */ | |
954 | /* permute(0)=0. This is intended and useful. */ | |
955 | static ub4 permute(ub4 x, ub4 nbits) | |
956 | // ub4 x; /* input, a value in some range */ | |
957 | // ub4 nbits; /* input, number of bits in range */ | |
958 | { | |
959 | int i; | |
960 | int mask = ((ub4)1<<nbits)-1; /* all ones */ | |
961 | int const2 = 1+nbits/2; | |
962 | int const3 = 1+nbits/3; | |
963 | int const4 = 1+nbits/4; | |
964 | int const5 = 1+nbits/5; | |
965 | for (i=0; i<20; ++i) | |
966 | { | |
967 | x = (x+(x<<const2)) & mask; | |
968 | x = (x^(x>>const3)); | |
969 | x = (x+(x<<const4)) & mask; | |
970 | x = (x^(x>>const5)); | |
971 | } | |
972 | return x; | |
973 | } | |
974 | ||
975 | /* initialize scramble[] with distinct random values in 0..smax-1 */ | |
976 | static void scrambleinit(ub4 *scramble, ub4 smax) | |
977 | // ub4 *scramble; /* hash is a^scramble[tab[b]] */ | |
978 | // ub4 smax; /* scramble values should be in 0..smax-1 */ | |
979 | { | |
980 | ub4 i; | |
981 | ||
982 | /* fill scramble[] with distinct random integers in 0..smax-1 */ | |
983 | for (i=0; i<SCRAMBLE_LEN; ++i) | |
984 | { | |
985 | scramble[i] = permute(i, log2u(smax)); | |
986 | } | |
987 | } | |
988 | ||
989 | ||
990 | /* | |
991 | * put keys in tabb according to key->b_k | |
992 | * check if the initial hash might work | |
993 | */ | |
994 | static int inittab(bstuff *tabb, ub4 blen, key *keys, ub4 nkeys, int complete) | |
995 | // bstuff *tabb; /* output, list of keys with b for (a,b) */ | |
996 | // ub4 blen; /* length of tabb */ | |
997 | // key *keys; /* list of keys already hashed */ | |
998 | // int complete; /* TRUE means to complete init despite collisions */ | |
999 | { | |
1000 | int nocollision = TRUE; | |
1001 | ub4 i; | |
1002 | ||
1003 | memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen)); | |
1004 | ||
1005 | /* Two keys with the same (a,b) guarantees a collision */ | |
1006 | for (i = 0; i < nkeys; i++) { | |
1007 | key *mykey = keys+i; | |
1008 | key *otherkey; | |
1009 | ||
1010 | for (otherkey=tabb[mykey->b_k].list_b; | |
1011 | otherkey; | |
1012 | otherkey=otherkey->nextb_k) | |
1013 | { | |
1014 | if (mykey->a_k == otherkey->a_k) | |
1015 | { | |
1016 | nocollision = FALSE; | |
1017 | if (!complete) | |
1018 | return FALSE; | |
1019 | } | |
1020 | } | |
1021 | ++tabb[mykey->b_k].listlen_b; | |
1022 | mykey->nextb_k = tabb[mykey->b_k].list_b; | |
1023 | tabb[mykey->b_k].list_b = mykey; | |
1024 | } | |
1025 | ||
1026 | /* no two keys have the same (a,b) pair */ | |
1027 | return nocollision; | |
1028 | } | |
1029 | ||
1030 | ||
1031 | /* Do the initial hash for normal mode (use lookup and checksum) */ | |
1032 | static void initnorm(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub8 salt) | |
1033 | // key *keys; /* list of all keys */ | |
1034 | // ub4 alen; /* (a,b) has a in 0..alen-1, a power of 2 */ | |
1035 | // ub4 blen; /* (a,b) has b in 0..blen-1, a power of 2 */ | |
1036 | // ub4 smax; /* maximum range of computable hash values */ | |
1037 | // ub4 salt; /* used to initialize the hash function */ | |
1038 | // gencode *final; /* output, code for the final hash */ | |
1039 | { | |
1040 | ub4 loga = log2u(alen); /* log based 2 of blen */ | |
1041 | ub4 i; | |
1042 | for (i = 0; i < nkeys; i++) { | |
1043 | key *mykey = keys+i; | |
1044 | ub8 hash = lookup8(mykey->name_k, mykey->len_k, salt); | |
9f83892a A |
1045 | mykey->a_k = (loga > 0) ? (ub4)(hash >> (UB8BITS-loga)) : 0; |
1046 | mykey->b_k = (blen > 1) ? (hash & (blen-1)) : 0; | |
832b6fce A |
1047 | } |
1048 | } | |
1049 | ||
1050 | ||
1051 | /* Try to apply an augmenting list */ | |
1052 | static int apply(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 tail, int rollback) | |
1053 | // bstuff *tabb; | |
1054 | // hstuff *tabh; | |
1055 | // qstuff *tabq; | |
1056 | // ub4 blen; | |
1057 | // ub4 *scramble; | |
1058 | // ub4 tail; | |
1059 | // int rollback; /* FALSE applies augmenting path, TRUE rolls back */ | |
1060 | { | |
1061 | ub4 hash; | |
1062 | key *mykey; | |
1063 | bstuff *pb; | |
1064 | ub4 child; | |
1065 | ub4 parent; | |
1066 | ub4 stabb; /* scramble[tab[b]] */ | |
1067 | ||
1068 | /* walk from child to parent */ | |
1069 | for (child=tail-1; child; child=parent) | |
1070 | { | |
1071 | parent = tabq[child].parent_q; /* find child's parent */ | |
1072 | pb = tabq[parent].b_q; /* find parent's list of siblings */ | |
1073 | ||
1074 | /* erase old hash values */ | |
1075 | stabb = scramble[pb->val_b]; | |
1076 | for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) | |
1077 | { | |
1078 | hash = mykey->a_k^stabb; | |
1079 | if (mykey == tabh[hash].key_h) | |
1080 | { /* erase hash for all of child's siblings */ | |
1081 | tabh[hash].key_h = (key *)0; | |
1082 | } | |
1083 | } | |
1084 | ||
1085 | /* change pb->val_b, which will change the hashes of all parent siblings */ | |
1086 | pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q); | |
1087 | ||
1088 | /* set new hash values */ | |
1089 | stabb = scramble[pb->val_b]; | |
1090 | for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k) | |
1091 | { | |
1092 | hash = mykey->a_k^stabb; | |
1093 | if (rollback) | |
1094 | { | |
1095 | if (parent == 0) continue; /* root never had a hash */ | |
1096 | } | |
1097 | else if (tabh[hash].key_h) | |
1098 | { | |
1099 | /* very rare: roll back any changes */ | |
1100 | apply(tabb, tabh, tabq, blen, scramble, tail, TRUE); | |
1101 | return FALSE; /* failure, collision */ | |
1102 | } | |
1103 | tabh[hash].key_h = mykey; | |
1104 | } | |
1105 | } | |
1106 | return TRUE; | |
1107 | } | |
1108 | ||
1109 | ||
1110 | /* | |
1111 | ------------------------------------------------------------------------------- | |
1112 | augment(): Add item to the mapping. | |
1113 | ||
1114 | Construct a spanning tree of *b*s with *item* as root, where each | |
1115 | parent can have all its hashes changed (by some new val_b) with | |
1116 | at most one collision, and each child is the b of that collision. | |
1117 | ||
1118 | I got this from Tarjan's "Data Structures and Network Algorithms". The | |
1119 | path from *item* to a *b* that can be remapped with no collision is | |
1120 | an "augmenting path". Change values of tab[b] along the path so that | |
1121 | the unmapped key gets mapped and the unused hash value gets used. | |
1122 | ||
1123 | Assuming 1 key per b, if m out of n hash values are still unused, | |
1124 | you should expect the transitive closure to cover n/m nodes before | |
1125 | an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect | |
1126 | this approach to take about nlogn time to map all single-key b's. | |
1127 | ------------------------------------------------------------------------------- | |
1128 | */ | |
1129 | static int augment(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 *scramble, ub4 smax, bstuff *item, ub4 nkeys, | |
1130 | ub4 highwater) | |
1131 | // bstuff *tabb; /* stuff indexed by b */ | |
1132 | // hstuff *tabh; /* which key is associated with which hash, indexed by hash */ | |
1133 | // qstuff *tabq; /* queue of *b* values, this is the spanning tree */ | |
1134 | // ub4 blen; /* length of tabb */ | |
1135 | // ub4 *scramble; /* final hash is a^scramble[tab[b]] */ | |
1136 | // ub4 smax; /* highest value in scramble */ | |
1137 | // bstuff *item; /* &tabb[b] for the b to be mapped */ | |
1138 | // ub4 nkeys; /* final hash must be in 0..nkeys-1 */ | |
1139 | // ub4 highwater; /* a value higher than any now in tabb[].water_b */ | |
1140 | { | |
1141 | ub4 q; /* current position walking through the queue */ | |
1142 | ub4 tail; /* tail of the queue. 0 is the head of the queue. */ | |
1143 | ub4 limit=UB1MAXVAL+1; | |
1144 | ub4 highhash = smax; | |
1145 | ||
1146 | /* initialize the root of the spanning tree */ | |
1147 | tabq[0].b_q = item; | |
1148 | tail = 1; | |
1149 | ||
1150 | /* construct the spanning tree by walking the queue, add children to tail */ | |
1151 | for (q=0; q<tail; ++q) | |
1152 | { | |
1153 | bstuff *myb = tabq[q].b_q; /* the b for this node */ | |
1154 | ub4 i; /* possible value for myb->val_b */ | |
1155 | ||
1156 | if (q == 1) | |
1157 | break; /* don't do transitive closure */ | |
1158 | ||
1159 | for (i=0; i<limit; ++i) | |
1160 | { | |
1161 | bstuff *childb = (bstuff *)0; /* the b that this i maps to */ | |
1162 | key *mykey; /* for walking through myb's keys */ | |
1163 | ||
1164 | for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k) | |
1165 | { | |
1166 | key *childkey; | |
1167 | ub4 hash = mykey->a_k^scramble[i]; | |
1168 | ||
1169 | if (hash >= highhash) break; /* out of bounds */ | |
1170 | childkey = tabh[hash].key_h; | |
1171 | ||
1172 | if (childkey) | |
1173 | { | |
1174 | bstuff *hitb = &tabb[childkey->b_k]; | |
1175 | ||
1176 | if (childb) | |
1177 | { | |
1178 | if (childb != hitb) break; /* hit at most one child b */ | |
1179 | } | |
1180 | else | |
1181 | { | |
1182 | childb = hitb; /* remember this as childb */ | |
1183 | if (childb->water_b == highwater) break; /* already explored */ | |
1184 | } | |
1185 | } | |
1186 | } | |
1187 | if (mykey) continue; /* myb with i has multiple collisions */ | |
1188 | ||
1189 | /* add childb to the queue of reachable things */ | |
1190 | if (childb) childb->water_b = highwater; | |
1191 | tabq[tail].b_q = childb; | |
1192 | tabq[tail].newval_q = i; /* how to make parent (myb) use this hash */ | |
1193 | tabq[tail].oldval_q = myb->val_b; /* need this for rollback */ | |
1194 | tabq[tail].parent_q = q; | |
1195 | ++tail; | |
1196 | ||
1197 | if (!childb) | |
1198 | { /* found an *i* with no collisions? */ | |
1199 | /* try to apply the augmenting path */ | |
1200 | if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE)) | |
1201 | return TRUE; /* success, item was added to the perfect hash */ | |
1202 | ||
1203 | --tail; /* don't know how to handle such a child! */ | |
1204 | } | |
1205 | } | |
1206 | } | |
1207 | return FALSE; | |
1208 | } | |
1209 | ||
1210 | ||
1211 | /* find a mapping that makes this a perfect hash */ | |
1212 | static int perfect(bstuff *tabb, hstuff *tabh, qstuff *tabq, ub4 blen, ub4 smax, ub4 *scramble, ub4 nkeys) | |
1213 | { | |
1214 | ub4 maxkeys; /* maximum number of keys for any b */ | |
1215 | ub4 i, j; | |
1216 | ||
1217 | #if SELOPT_DEBUG | |
1218 | fprintf(stderr, " blen %d smax %d nkeys %d\n", blen, smax, nkeys); | |
1219 | #endif | |
1220 | ||
1221 | /* clear any state from previous attempts */ | |
1222 | memset((void *)tabh, 0, sizeof(hstuff)*smax); | |
1223 | memset((void *)tabq, 0, sizeof(qstuff)*(blen+1)); | |
1224 | ||
1225 | for (maxkeys=0,i=0; i<blen; ++i) | |
1226 | if (tabb[i].listlen_b > maxkeys) | |
1227 | maxkeys = tabb[i].listlen_b; | |
1228 | ||
1229 | /* In descending order by number of keys, map all *b*s */ | |
1230 | for (j=maxkeys; j>0; --j) | |
1231 | for (i=0; i<blen; ++i) | |
1232 | if (tabb[i].listlen_b == j) | |
1233 | if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys, | |
1234 | i+1)) | |
1235 | { | |
1236 | return FALSE; | |
1237 | } | |
1238 | ||
1239 | /* Success! We found a perfect hash of all keys into 0..nkeys-1. */ | |
1240 | return TRUE; | |
1241 | } | |
1242 | ||
1243 | ||
1244 | /* guess initial values for alen and blen */ | |
1245 | static void initalen(ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys) | |
1246 | // ub4 *alen; /* output, initial alen */ | |
1247 | // ub4 *blen; /* output, initial blen */ | |
1248 | // ub4 smax; /* input, power of two greater or equal to max hash value */ | |
1249 | // ub4 nkeys; /* number of keys being hashed */ | |
1250 | { | |
1251 | /* | |
1252 | * Find initial *alen, *blen | |
1253 | * Initial alen and blen values were found empirically. Some factors: | |
1254 | * | |
1255 | * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1. | |
1256 | * | |
1257 | * alen and blen must be powers of 2 because the values in 0..alen-1 and | |
1258 | * 0..blen-1 are produced by applying a bitmask to the initial hash function. | |
1259 | * | |
1260 | * alen must be less than smax, in fact less than nkeys, because otherwise | |
1261 | * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for | |
1262 | * all the *a*s associated with a given *b*, so there would be no legal | |
1263 | * value to assign to tab[b]. This only matters when we're doing a minimal | |
1264 | * perfect hash. | |
1265 | * | |
1266 | * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8) | |
1267 | * and alen*blen = smax*smax/32. | |
1268 | * | |
1269 | * Values of blen less than smax/4 never work, and smax/2 always works. | |
1270 | * | |
1271 | * We want blen as small as possible because it is the number of bytes in | |
1272 | * the huge array we must create for the perfect hash. | |
1273 | * | |
1274 | * When nkey <= smax*(5/8), blen=smax/4 works much more often with | |
1275 | * alen=smax/8 than with alen=smax/4. Above smax*(5/8), blen=smax/4 | |
1276 | * doesn't seem to care whether alen=smax/8 or alen=smax/4. I think it | |
1277 | * has something to do with 5/8 = 1/8 * 5. For example examine 80000, | |
1278 | * 85000, and 90000 keys with different values of alen. This only matters | |
1279 | * if we're doing a minimal perfect hash. | |
1280 | * | |
1281 | * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer. | |
1282 | * Bigger than that it must produce two integers, which increases the | |
1283 | * cost of the hash per character hashed. | |
1284 | */ | |
1285 | *alen = smax; /* no reason to restrict alen to smax/2 */ | |
1286 | *blen = ((nkeys <= smax*0.6) ? smax/16 : | |
1287 | (nkeys <= smax*0.8) ? smax/8 : smax/4); | |
1288 | ||
1289 | if (*alen < 1) *alen = 1; | |
1290 | if (*blen < 1) *blen = 1; | |
1291 | ||
1292 | #if SELOPT_DEBUG | |
1293 | fprintf(stderr, "alen %d blen %d smax %d nkeys %d\n", *alen, *blen, smax, nkeys); | |
1294 | #endif | |
1295 | } | |
1296 | ||
1297 | /* | |
1298 | ** Try to find a perfect hash function. | |
1299 | ** Return the successful initializer for the initial hash. | |
1300 | ** Return 0 if no perfect hash could be found. | |
1301 | */ | |
1302 | static int findhash(bstuff **tabb, ub4 *alen, ub4 *blen, ub8 *salt, | |
1303 | ub4 *scramble, ub4 smax, key *keys, ub4 nkeys) | |
1304 | // bstuff **tabb; /* output, tab[] of the perfect hash, length *blen */ | |
1305 | // ub4 *alen; /* output, 0..alen-1 is range for a of (a,b) */ | |
1306 | // ub4 *blen; /* output, 0..blen-1 is range for b of (a,b) */ | |
1307 | // ub4 *salt; /* output, initializes initial hash */ | |
1308 | // ub4 *scramble; /* input, hash = a^scramble[tab[b]] */ | |
1309 | // ub4 smax; /* input, scramble[i] in 0..smax-1 */ | |
1310 | // key *keys; /* input, keys to hash */ | |
1311 | // ub4 nkeys; /* input, number of keys being hashed */ | |
1312 | { | |
1313 | ub4 bad_initkey; /* how many times did initkey fail? */ | |
1314 | ub4 bad_perfect; /* how many times did perfect fail? */ | |
1315 | ub4 si; /* trial initializer for initial hash */ | |
1316 | ub4 maxalen; | |
1317 | hstuff *tabh; /* table of keys indexed by hash value */ | |
1318 | qstuff *tabq; /* table of stuff indexed by queue value, used by augment */ | |
1319 | ||
1320 | /* guess initial values for alen and blen */ | |
1321 | initalen(alen, blen, smax, nkeys); | |
1322 | ||
1323 | scrambleinit(scramble, smax); | |
1324 | ||
1325 | maxalen = smax; | |
1326 | ||
1327 | /* allocate working memory */ | |
1328 | *tabb = new bstuff[*blen]; | |
1329 | tabq = new qstuff[*blen+1]; | |
1330 | tabh = new hstuff[smax]; | |
1331 | ||
1332 | /* Actually find the perfect hash */ | |
1333 | *salt = 0; | |
1334 | bad_initkey = 0; | |
1335 | bad_perfect = 0; | |
1336 | for (si=1; ; ++si) | |
1337 | { | |
1338 | ub4 rslinit; | |
1339 | /* Try to find distinct (A,B) for all keys */ | |
1340 | *salt = si * 0x9e3779b97f4a7c13LL; /* golden ratio (arbitrary value) */ | |
1341 | initnorm(keys, nkeys, *alen, *blen, smax, *salt); | |
1342 | rslinit = inittab(*tabb, *blen, keys, nkeys, FALSE); | |
1343 | if (rslinit == 0) | |
1344 | { | |
1345 | /* didn't find distinct (a,b) */ | |
1346 | if (++bad_initkey >= RETRY_INITKEY) | |
1347 | { | |
1348 | /* Try to put more bits in (A,B) to make distinct (A,B) more likely */ | |
1349 | if (*alen < maxalen) | |
1350 | { | |
1351 | *alen *= 2; | |
1352 | } | |
1353 | else if (*blen < smax) | |
1354 | { | |
1355 | *blen *= 2; | |
1356 | delete[] tabq; | |
1357 | delete[] *tabb; | |
1358 | *tabb = new bstuff[*blen]; | |
1359 | tabq = new qstuff[*blen+1]; | |
1360 | } | |
1361 | bad_initkey = 0; | |
1362 | bad_perfect = 0; | |
1363 | } | |
1364 | continue; /* two keys have same (a,b) pair */ | |
1365 | } | |
1366 | ||
1367 | /* Given distinct (A,B) for all keys, build a perfect hash */ | |
1368 | if (!perfect(*tabb, tabh, tabq, *blen, smax, scramble, nkeys)) | |
1369 | { | |
1370 | if (++bad_perfect >= RETRY_PERFECT) | |
1371 | { | |
1372 | if (*blen < smax) | |
1373 | { | |
1374 | *blen *= 2; | |
1375 | delete[] *tabb; | |
1376 | delete[] tabq; | |
1377 | *tabb = new bstuff[*blen]; | |
1378 | tabq = new qstuff[*blen+1]; | |
1379 | --si; /* we know this salt got distinct (A,B) */ | |
1380 | } | |
1381 | else | |
1382 | { | |
1383 | return 0; | |
1384 | } | |
1385 | bad_perfect = 0; | |
1386 | } | |
1387 | continue; | |
1388 | } | |
1389 | ||
1390 | break; | |
1391 | } | |
1392 | ||
1393 | /* free working memory */ | |
1394 | delete[] tabh; | |
1395 | delete[] tabq; | |
1396 | ||
1397 | return 1; | |
1398 | } | |
1399 | ||
1400 | /* | |
1401 | ------------------------------------------------------------------------------ | |
1402 | Input/output type routines | |
1403 | ------------------------------------------------------------------------------ | |
1404 | */ | |
1405 | ||
1406 | /* get the list of keys */ | |
1407 | static void getkeys(key **keys, ub4 *nkeys, const string_map& strings) | |
1408 | { | |
1409 | key *buf = new key[strings.size()]; | |
1410 | size_t i; | |
1411 | string_map::const_iterator s; | |
1412 | for (i = 0, s = strings.begin(); s != strings.end(); ++s, ++i) { | |
1413 | key *mykey = buf+i; | |
1414 | mykey->name_k = (ub1 *)s->first; | |
1415 | mykey->len_k = (ub4)strlen(s->first); | |
1416 | } | |
1417 | *keys = buf; | |
9f83892a | 1418 | *nkeys = (ub4)strings.size(); |
832b6fce A |
1419 | } |
1420 | ||
1421 | ||
1422 | static perfect_hash | |
1423 | make_perfect(const string_map& strings) | |
1424 | { | |
1425 | ub4 nkeys; /* number of keys */ | |
1426 | key *keys; /* head of list of keys */ | |
1427 | bstuff *tab; /* table indexed by b */ | |
1428 | ub4 smax; /* scramble[] values in 0..smax-1, a power of 2 */ | |
1429 | ub4 alen; /* a in 0..alen-1, a power of 2 */ | |
1430 | ub4 blen; /* b in 0..blen-1, a power of 2 */ | |
1431 | ub8 salt; /* a parameter to the hash function */ | |
1432 | ub4 scramble[SCRAMBLE_LEN]; /* used in final hash function */ | |
1433 | int ok; | |
1434 | int i; | |
1435 | perfect_hash result; | |
1436 | ||
1437 | /* read in the list of keywords */ | |
1438 | getkeys(&keys, &nkeys, strings); | |
1439 | ||
1440 | /* find the hash */ | |
1441 | smax = ((ub4)1<<log2u(nkeys)); | |
1442 | ok = findhash(&tab, &alen, &blen, &salt, | |
1443 | scramble, smax, keys, nkeys); | |
1444 | if (!ok) { | |
1445 | smax = 2 * ((ub4)1<<log2u(nkeys)); | |
1446 | ok = findhash(&tab, &alen, &blen, &salt, | |
1447 | scramble, smax, keys, nkeys); | |
1448 | } | |
1449 | if (!ok) { | |
1450 | bzero(&result, sizeof(result)); | |
1451 | } else { | |
1452 | /* build the tables */ | |
1453 | result.capacity = smax; | |
1454 | result.occupied = nkeys; | |
1455 | result.shift = UB8BITS - log2u(alen); | |
1456 | result.mask = blen - 1; | |
1457 | result.salt = salt; | |
1458 | ||
1459 | result.tab = new uint8_t[blen]; | |
1460 | for (i = 0; i < blen; i++) { | |
1461 | result.tab[i] = tab[i].val_b; | |
1462 | } | |
1463 | for (i = 0; i < 256; i++) { | |
1464 | result.scramble[i] = scramble[i]; | |
1465 | } | |
1466 | } | |
1467 | ||
1468 | delete[] keys; | |
1469 | delete[] tab; | |
1470 | ||
1471 | return result; | |
1472 | } | |
1473 | ||
1474 | // SELOPT_WRITE | |
1475 | #endif | |
1476 | ||
1477 | // namespace objc_selopt | |
1478 | }; | |
1479 | ||
1480 | #undef S32 | |
1481 | #undef S64 | |
1482 | ||
1483 | #endif |