]> git.saurik.com Git - apple/cf.git/blob - CFBurstTrie.c
CF-1152.14.tar.gz
[apple/cf.git] / CFBurstTrie.c
1 /*
2 * Copyright (c) 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFBurstTrie.c
25 Copyright (c) 2008-2014, Apple Inc. All rights reserved.
26 Responsibility: Jennifer Moore
27 */
28
29 #include "CFInternal.h"
30 #include "CFBurstTrie.h"
31 #include "CFByteOrder.h"
32 #include "CFNumber.h"
33 #include <stdio.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <fcntl.h>
37 #include <sys/stat.h>
38 #include <limits.h>
39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
40 #include <unistd.h>
41 #include <sys/param.h>
42 #include <sys/mman.h>
43 #endif
44
45 #include <errno.h>
46 #include <assert.h>
47
48 #if DEPLOYMENT_TARGET_WINDOWS
49 #define open _NS_open
50 #define statinfo _stat
51 #define stat(x,y) _NS_stat(x,y)
52 #define __builtin_memcmp(x, y, z) memcmp(x, y, z)
53 #define __builtin_popcountll(x) popcountll(x)
54 #define bzero(dst, size) ZeroMemory(dst, size)
55 #define S_IWUSR 0
56 #define S_IRUSR 0
57
58 static int pwrite(int fd, const void *buf, size_t nbyte, off_t offset) {
59 // Get where we are
60 long pos = _tell(fd);
61
62 // Move to new offset
63 _lseek(fd, offset, SEEK_SET);
64
65 // Write data
66 int res = _write(fd, buf, nbyte);
67
68 // Return to previous offset
69 _lseek(fd, pos, SEEK_SET);
70
71 return res;
72 }
73
74 #else
75 #define statinfo stat
76 #endif
77
78 #if 0
79 #pragma mark Types and Utilities
80 #endif
81
82 #define MAX_STRING_ALLOCATION_SIZE 342
83 #define MAX_STRING_SIZE 1024
84 #define MAX_KEY_LENGTH MAX_STRING_SIZE * 4
85 #define CHARACTER_SET_SIZE 256
86 #define MAX_LIST_SIZE 256 // 64
87 #define MAX_BITMAP_SIZE 200
88 #define MAX_BUFFER_SIZE (4096<<2)
89
90 #define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3))
91 #define NextTrie_GetKind(p) (p & 3)
92 #define NextTrie_SetKind(p, kind) (p |= (3&kind))
93
94 #define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3)))
95 #define DiskNextTrie_GetKind(p) (p & 3)
96 #define DiskNextTrie_SetKind(p, kind) (p |= (3&kind))
97
98 // Use this macro to avoid forgetting to check the pointer before assigning value to it.
99 #define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0)
100
101 enum { Nothing = 0, TrieKind = 1, ListKind = 2, CompactTrieKind = 3 };
102 typedef enum { FailedInsert = 0, NewTerm = 1, ExistingTerm = 2 } CFBTInsertCode;
103
104 #pragma pack (1)
105 typedef uintptr_t NextTrie;
106
107 typedef struct _TrieLevel {
108 NextTrie slots[CHARACTER_SET_SIZE];
109 uint32_t weight;
110 uint32_t payload;
111 } TrieLevel;
112 typedef TrieLevel *TrieLevelRef;
113
114 typedef struct _MapTrieLevel {
115 uint32_t slots[CHARACTER_SET_SIZE];
116 uint32_t payload;
117 } MapTrieLevel;
118 typedef MapTrieLevel *MapTrieLevelRef;
119
120 typedef struct _CompactMapTrieLevel {
121 uint64_t bitmap[CHARACTER_SET_SIZE / 64];
122 uint32_t payload;
123 uint32_t slots[];
124 } CompactMapTrieLevel;
125 typedef CompactMapTrieLevel *CompactMapTrieLevelRef;
126
127 typedef struct _ListNode {
128 struct _ListNode *next;
129 uint32_t weight;
130 uint32_t payload;
131 uint16_t length;
132 UInt8 string[];
133 }* ListNodeRef;
134
135 typedef struct _Page {
136 uint32_t length;
137 char data[];
138 } Page;
139
140 typedef struct _PageEntryPacked {
141 uint8_t pfxLen;
142 uint16_t strlen;
143 uint32_t payload;
144 UInt8 string[];
145 } PageEntryPacked;
146
147 typedef struct _PageEntry {
148 uint16_t strlen;
149 uint32_t payload;
150 UInt8 string[];
151 } PageEntry;
152
153 typedef struct _TrieHeader {
154 uint32_t signature;
155 uint32_t rootOffset;
156 uint32_t count;
157 uint32_t size;
158 uint32_t flags;
159 uint64_t reserved[16];
160 } TrieHeader;
161
162 typedef struct _TrieCursor {
163 uint64_t signature;
164 uint64_t counter;
165 NextTrie next;
166 uint32_t keylen;
167 uint32_t prefixlen;
168 const uint8_t *prefix;
169 uint8_t key[MAX_KEY_LENGTH];
170 } TrieCursor;
171
172 typedef struct _MapCursor {
173 uint64_t signature;
174 TrieHeader *header;
175 uint32_t next;
176 uint32_t prefixlen;
177 uint32_t keylen;
178 const uint8_t *prefix;
179 uint8_t key[MAX_STRING_SIZE*4];
180 } MapCursor;
181
182 typedef struct _CompactMapCursor {
183 uint32_t next;
184 uint32_t entryOffsetInPage;
185 uint32_t offsetInEntry;
186 uint32_t payload;
187 // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether
188 // the cursor is merely pointing at the beginning of the page, or the first entry.
189 // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level,
190 // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor
191 // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and
192 // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the
193 // trie. the page level will actually contains three entries. The first entry corresponds to string "a".
194 // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will
195 // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still
196 // set to 0s. So we need an additional variable to make distinction between these two situations.
197 BOOL isOnPage;
198 } CompactMapCursor;
199 typedef struct _CompactMapCursor *MapCursorRef;
200
201 enum {
202 _kCFBurstTrieCursorTrieType = 0,
203 _kCFBurstTrieCursorMapType
204 };
205
206 typedef struct _CFBurstTrieCursor {
207 CompactMapCursor mapCursor;
208 CFIndex cursorType;
209 CFBurstTrieRef trie;
210 } _CFBurstTrieCursor;
211
212 // ** Legacy
213 typedef struct _DiskTrieLevel {
214 uint32_t slots[CHARACTER_SET_SIZE];
215 uint32_t weight;
216 uint32_t payload;
217 } DiskTrieLevel;
218 typedef DiskTrieLevel *DiskTrieLevelRef;
219
220 typedef struct _CompactDiskTrieLevel {
221 uint64_t bitmap[CHARACTER_SET_SIZE / 64]; // CHARACTER_SET_SIZE / 64bits per word
222 uint32_t weight;
223 uint32_t payload;
224 uint32_t slots[];
225 } CompactDiskTrieLevel;
226 typedef CompactDiskTrieLevel *CompactDiskTrieLevelRef;
227
228 typedef struct _StringPage {
229 uint32_t length;
230 char data[];
231 } StringPage;
232
233 typedef struct _StringPageEntryPacked {
234 uint8_t pfxLen;
235 uint16_t strlen; // make uint8_t if possible
236 uint32_t payload;
237 char string[];
238 } StringPageEntryPacked;
239
240 typedef struct _StringPageEntry {
241 uint16_t strlen; // make uint8_t if possible
242 uint32_t payload;
243 char string[];
244 } StringPageEntry;
245
246 typedef struct _fileHeader {
247 uint32_t signature;
248 uint32_t rootOffset;
249 uint32_t count;
250 uint32_t size;
251 uint32_t flags;
252 } fileHeader;
253 // **
254 #pragma pack()
255
256 struct _CFBurstTrie {
257 union {
258 TrieLevel root;
259 DiskTrieLevel diskRoot;
260 MapTrieLevel maproot;
261 };
262 char *mapBase;
263 uint32_t mapSize;
264 uint32_t mapOffset;
265 uint32_t cflags;
266 uint32_t count;
267 uint32_t containerSize;
268 int retain;
269 #if DEPLOYMENT_TARGET_WINDOWS
270 HANDLE mapHandle;
271 HANDLE mappedFileHandle;
272 #endif
273 };
274
275 #if 0
276 #pragma mark -
277 #pragma mark Forward declarations
278 #endif
279
280 typedef struct _TraverseContext {
281 void *context;
282 void (*callback)(void*, const UInt8*, uint32_t, uint32_t);
283 } TraverseContext;
284
285 static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact)
286 {
287 if (context != NULL) {
288 TraverseContext *ctx = (TraverseContext *)context;
289 if (ctx->context && ctx->callback) {
290 ctx->callback(ctx->context, key, 1, payload);
291 }
292 }
293 return false;
294 }
295
296 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
297
298 static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload);
299
300 static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool));
301 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool));
302 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
303 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
304 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
305 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
306
307 static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd);
308
309 static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
310 static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
311 static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
312
313 static void destroyCFBurstTrie(CFBurstTrieRef trie);
314 static void finalizeCFBurstTrie(TrieLevelRef trie);
315 static void finalizeCFBurstTrieList(ListNodeRef node);
316
317 static int nodeWeightCompare(const void *a, const void *b);
318 static int nodeStringCompare(const void *a, const void *b);
319
320 static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact);
321 static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact);
322
323 static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer);
324
325 static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length);
326 static Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload);
327 static void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination);
328 static Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs);
329 static void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback);
330 static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload);
331 static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload);
332
333 CFBurstTrieRef CFBurstTrieCreateWithOptions(CFDictionaryRef options) {
334 CFBurstTrieRef trie = NULL;
335 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
336 trie->containerSize = MAX_LIST_SIZE;
337
338 CFNumberRef valueAsCFNumber;
339 if (CFDictionaryGetValueIfPresent(options, kCFBurstTrieCreationOptionNameContainerSize, (const void **)&valueAsCFNumber)) {
340 int value;
341 CFNumberGetValue(valueAsCFNumber, kCFNumberIntType, &value);
342 trie->containerSize = value > 2 && value < 4096 ? value : MAX_LIST_SIZE;
343 }
344 trie->retain = 1;
345 return trie;
346 }
347
348 CFBurstTrieRef CFBurstTrieCreate() {
349 CFBurstTrieRef trie = NULL;
350 int listSize = MAX_LIST_SIZE;
351 CFNumberRef value = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &listSize);
352 CFMutableDictionaryRef options = CFDictionaryCreateMutable(kCFAllocatorDefault, 1, NULL, NULL);
353 CFDictionarySetValue(options, kCFBurstTrieCreationOptionNameContainerSize, value);
354 trie = CFBurstTrieCreateWithOptions(options);
355 CFRelease(value);
356 CFRelease(options);
357 return trie;
358 }
359
360 CFBurstTrieRef CFBurstTrieCreateFromFile(CFStringRef path) {
361 struct statinfo sb;
362 char filename[PATH_MAX];
363 int fd;
364
365 /* Check valid path name */
366 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return NULL;
367
368 /* Check if file exists */
369 if (stat(filename, &sb) != 0) return NULL;
370
371 /* Check if file can be opened */
372 if ((fd=open(filename, CF_OPENFLGS|O_RDONLY)) < 0) return NULL;
373
374 #if DEPLOYMENT_TARGET_WINDOWS
375 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd);
376 if (!mappedFileHandle) return NULL;
377
378 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
379 if (!mapHandle) return NULL;
380
381 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, sb.st_size);
382 if (!map) return NULL;
383 #else
384 char *map = mmap(0, sb.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, 0);
385 #endif
386
387 CFBurstTrieRef trie = NULL;
388 TrieHeader *header = (TrieHeader *)map;
389
390 if (((uint32_t*)map)[0] == 0xbabeface) {
391 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
392 trie->mapBase = map;
393 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size);
394 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset);
395 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags);
396 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count);
397 trie->retain = 1;
398 #if DEPLOYMENT_TARGET_WINDOWS
399 trie->mappedFileHandle = mappedFileHandle;
400 trie->mapHandle = mapHandle;
401 #else
402 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
403 close(fd);
404 #endif
405 } else if (header->signature == 0xcafebabe || header->signature == 0x0ddba11) {
406 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
407 trie->mapBase = map;
408 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size);
409 trie->cflags = CFSwapInt32LittleToHost(header->flags);
410 trie->count = CFSwapInt32LittleToHost(header->count);
411 trie->retain = 1;
412 #if DEPLOYMENT_TARGET_WINDOWS
413 trie->mappedFileHandle = mappedFileHandle;
414 trie->mapHandle = mapHandle;
415 #else
416 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
417 close(fd);
418 #endif
419 } else {
420 close(fd);
421 }
422 return trie;
423 }
424
425 CFBurstTrieRef CFBurstTrieCreateFromMapBytes(char *mapBase) {
426 CFBurstTrieRef trie = NULL;
427 TrieHeader *header = (TrieHeader *)mapBase;
428
429 if (mapBase && ((uint32_t*)mapBase)[0] == 0xbabeface) {
430 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie));
431 trie->mapBase = mapBase;
432 trie->mapSize = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->size);
433 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset);
434 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags);
435 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count);
436 trie->retain = 1;
437 } else if (mapBase && (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) {
438 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie));
439 trie->mapBase = mapBase;
440 trie->mapSize = CFSwapInt32LittleToHost(header->size);
441 trie->cflags = CFSwapInt32LittleToHost(header->flags);
442 trie->count = CFSwapInt32LittleToHost(header->count);
443 trie->retain = 1;
444 }
445 return trie;
446 }
447
448 Boolean CFBurstTrieInsert(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex payload) {
449 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, (uint32_t)payload);
450 }
451
452 Boolean CFBurstTrieAdd(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t payload) {
453 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, payload);
454 }
455
456 Boolean CFBurstTrieInsertCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex payload) {
457 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, (uint32_t)payload);
458 }
459
460 Boolean CFBurstTrieAddCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t payload) {
461 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, payload);
462 }
463
464 Boolean CFBurstTrieInsertUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex payload) {
465 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, (uint32_t)payload);
466 }
467
468 Boolean CFBurstTrieAddUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t payload) {
469 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, payload);
470 }
471
472 Boolean CFBurstTrieInsertWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex weight, CFIndex payload) {
473 return CFBurstTrieAddWithWeight(trie, term, termRange, weight, (uint32_t)payload);
474 }
475
476 Boolean CFBurstTrieAddWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t weight, uint32_t payload) {
477 Boolean success = false;
478 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
479 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size
480 if (!trie->mapBase && termRange.length < MAX_STRING_SIZE && payload > 0) {
481 CFIndex length;
482 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
483 UInt8 *key = buffer;
484 if (bytesize >= size) {
485 size = bytesize;
486 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
487 }
488 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length);
489 key[length] = 0;
490
491 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload);
492 if (buffer != key) free(key);
493 }
494 return success;
495 }
496
497 Boolean CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex weight, CFIndex payload) {
498 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, weight, (uint32_t)payload);
499 }
500
501 Boolean CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t weight, uint32_t payload) {
502 Boolean success = false;
503 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
504 CFIndex bytesize = numChars * 4; //** 4-byte max character size
505 if (!trie->mapBase && numChars < MAX_STRING_SIZE && payload > 0) {
506 CFIndex length;
507 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
508 UInt8 *key = buffer;
509 if (bytesize >= size) {
510 size = bytesize;
511 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
512 }
513 length = burstTrieConvertCharactersToUTF8(chars, numChars, key);
514 key[length] = 0;
515
516 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload);
517 if (buffer != key) free(key);
518 }
519 return success;
520 }
521
522 Boolean CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex weight, CFIndex payload) {
523 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, weight, (uint32_t)payload);
524 }
525
526 Boolean CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t weight, uint32_t payload) {
527 CFBTInsertCode code = FailedInsert;
528
529 if (!trie->mapBase && numChars < MAX_STRING_SIZE*4 && payload > 0) {
530 code = addCFBurstTrieLevel(trie, &trie->root, chars, numChars, weight, payload);
531 if (code == NewTerm) trie->count++;
532 }
533 return code > FailedInsert;
534 }
535
536 Boolean CFBurstTrieFind(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex *payload) {
537 uint32_t p;
538 if (CFBurstTrieContains(trie, term, termRange, &p)) {
539 SetPayload(payload, p);
540 return true;
541 }
542 return false;
543 }
544
545 Boolean CFBurstTrieContains(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t *payload) {
546 Boolean success = false;
547 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
548 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size
549 if (termRange.length < MAX_STRING_SIZE) {
550 CFIndex length;
551 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE+1];
552 UInt8 *key = buffer;
553 if (bytesize >= size) {
554 size = bytesize;
555 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
556 }
557 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length);
558 key[length] = 0;
559
560 success = CFBurstTrieContainsUTF8String(trie, key, length, payload);
561 if (buffer != key) free(key);
562 }
563 return success;
564 }
565
566 Boolean CFBurstTrieFindCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex *payload) {
567 uint32_t p;
568 if (CFBurstTrieContainsCharacters(trie, chars, numChars, &p)) {
569 SetPayload(payload, p);
570 return true;
571 }
572 return false;
573 }
574
575 Boolean CFBurstTrieContainsCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t *payload) {
576 Boolean success = false;
577 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
578 CFIndex bytesize = numChars * 4; //** 4-byte max character size
579 if (numChars < MAX_STRING_SIZE) {
580 CFIndex length;
581 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
582 UInt8 *key = buffer;
583 if (bytesize >= size) {
584 size = bytesize;
585 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
586 }
587 length = burstTrieConvertCharactersToUTF8(chars, numChars, key);
588 key[length] = 0;
589
590 success = CFBurstTrieContainsUTF8String(trie, key, length, payload);
591 if (buffer != key) free(key);
592 }
593 return success;
594 }
595
596 Boolean CFBurstTrieFindUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, CFIndex *payload) {
597 uint32_t p;
598 if (CFBurstTrieContainsUTF8String(trie, key, length, &p)) {
599 SetPayload(payload, p);
600 return true;
601 }
602 return false;
603 }
604
605 Boolean CFBurstTrieContainsUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, uint32_t *payload) {
606 Boolean success = false;
607 if (length < MAX_STRING_SIZE) {
608 if (trie->mapBase && ((fileHeader *)trie->mapBase)->signature == 0xbabeface) {
609 bool prefix = (trie->cflags & kCFBurstTriePrefixCompression);
610 success = burstTrieMappedFind((DiskTrieLevelRef)(trie->mapBase+CFSwapInt32LittleToHost((((uint32_t*)trie->mapBase)[1]))), trie->mapBase, key, length, payload, prefix);
611 } else if (trie->mapBase && trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey)) {
612 _CFBurstTrieCursor cursor;
613 if (!CFBurstTrieSetCursorForBytes(trie, &cursor, key, length))
614 return FALSE;
615 return CFBurstTrieCursorGetPayload(&cursor, payload);
616 } else {
617 uint32_t found = 0;
618 void *cursor = 0;
619 traverseCFBurstTrieWithCursor(trie, key, length, &cursor, true, &found, containsKey);
620 if (found) SetPayload(payload, found);
621 success = found > 0;
622 }
623 }
624 return success;
625 }
626
627 Boolean CFBurstTrieSerialize(CFBurstTrieRef trie, CFStringRef path, CFBurstTrieOpts opts) {
628 Boolean success = false;
629 if (trie->mapBase) {
630 return success;
631 } else {
632 int fd;
633 char filename[PATH_MAX];
634
635 /* Check valid path name */
636 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return success;
637
638 /* Check if file can be opened */
639 if ((fd=open(filename, CF_OPENFLGS|O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) < 0) return success;
640
641 if (CFBurstTrieSerializeWithFileDescriptor(trie, fd, opts)) success = true;
642
643 close(fd);
644 }
645 return success;
646 }
647
648 Boolean CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie, int fd, CFBurstTrieOpts opts) {
649 Boolean success = false;
650 if (!trie->mapBase && fd >= 0) {
651 off_t start_offset = lseek(fd, 0, SEEK_END);
652
653 trie->cflags = opts;
654 trie->mapSize = serializeCFBurstTrie(trie, start_offset, fd);
655
656 #if DEPLOYMENT_TARGET_WINDOWS
657 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd);
658 // We need to make sure we have our own handle to keep this file open as long as the mmap lasts
659 DuplicateHandle(GetCurrentProcess(), mappedFileHandle, GetCurrentProcess(), &mappedFileHandle, 0, 0, DUPLICATE_SAME_ACCESS);
660 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
661 if (!mapHandle) return NULL;
662 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, start_offset, trie->mapSize);
663 if (!map) return NULL;
664 trie->mapBase = map;
665 trie->mapHandle = mapHandle;
666 trie->mappedFileHandle = mappedFileHandle;
667 #else
668 trie->mapBase = mmap(0, trie->mapSize, PROT_READ, MAP_FILE|MAP_SHARED, fd, start_offset);
669 #endif
670 success = true;
671 }
672
673 return success;
674 }
675
676 void CFBurstTrieTraverse(CFBurstTrieRef trie, void *ctx, void (*callback)(void*, const UInt8*, uint32_t, uint32_t)) {
677 TrieHeader *header = (TrieHeader *)trie->mapBase;
678 if (!trie->mapBase || (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) {
679 void *cursor = 0;
680 TraverseContext context;
681 context.context = ctx;
682 context.callback = callback;
683 traverseCFBurstTrieWithCursor(trie, (const uint8_t *)"", 0, &cursor, false, &context, foundKey);
684 }
685 }
686
687
688 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
689 {
690 traverseCFBurstTrieWithCursor(trie, prefix, prefixLen, cursor, false, ctx, callback);
691 }
692
693 void CFBurstTriePrint(CFBurstTrieRef trie) {
694
695 }
696
697 CFIndex CFBurstTrieGetCount(CFBurstTrieRef trie) {
698 return trie->count;
699 }
700
701 CFBurstTrieRef CFBurstTrieRetain(CFBurstTrieRef trie) {
702 trie->retain++;
703 return trie;
704 }
705
706 void CFBurstTrieRelease(CFBurstTrieRef trie) {
707 trie->retain--;
708 if (trie->retain == 0) destroyCFBurstTrie(trie);
709 return;
710 }
711
712 Boolean CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie, CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length)
713 {
714 if (!trie->mapBase || !(trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey))) {
715 //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n");
716 return FALSE;
717 }
718
719 TrieHeader *header = (TrieHeader*)trie->mapBase;
720 if (length < 0 || !trie)
721 return FALSE;
722
723 cursor->trie = trie;
724 if (trie->mapBase) {
725 cursor->cursorType = _kCFBurstTrieCursorMapType;
726 cursor->mapCursor.next = header->rootOffset;
727 cursor->mapCursor.isOnPage = FALSE;
728 cursor->mapCursor.entryOffsetInPage = 0;
729 cursor->mapCursor.offsetInEntry = 0;
730 cursor->mapCursor.payload = 0;
731 } else
732 assert(false);
733
734 if (!bytes || length == 0)
735 return TRUE;
736
737 return CFBurstTrieCursorAdvanceForBytes(cursor, bytes, length);
738 }
739
740
741 CFBurstTrieCursorRef CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie, const UInt8* bytes, CFIndex length)
742 {
743 CFBurstTrieCursorRef cursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1);
744 if (!CFBurstTrieSetCursorForBytes(trie, cursor, bytes, length)) {
745 CFBurstTrieCursorRelease(cursor);
746 return NULL;
747 }
748 return cursor;
749 }
750
751 CFBurstTrieCursorRef CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor)
752 {
753 if (!cursor)
754 return NULL;
755
756 CFBurstTrieCursorRef newCursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1);
757 switch (cursor->cursorType) {
758 case _kCFBurstTrieCursorMapType:
759 copyMapCursor(&cursor->mapCursor, &newCursor->mapCursor);
760 break;
761 case _kCFBurstTrieCursorTrieType:
762 assert(false);
763 break;
764 }
765 newCursor->cursorType = cursor->cursorType;
766 newCursor->trie = cursor->trie;
767 return newCursor;
768 }
769
770 Boolean CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs, CFBurstTrieCursorRef rhs)
771 {
772 if (lhs->trie != rhs->trie || lhs->cursorType != rhs->cursorType)
773 return FALSE;
774
775 if (lhs->cursorType == _kCFBurstTrieCursorMapType)
776 return areMapCursorsEqual(&lhs->mapCursor, &rhs->mapCursor);
777 else
778 return FALSE;
779 }
780
781 Boolean CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length)
782 {
783 switch (cursor->cursorType) {
784 case _kCFBurstTrieCursorMapType: {
785 CompactMapCursor tempCursor;
786 copyMapCursor(&cursor->mapCursor, &tempCursor);
787 if (advanceMapCursor(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, bytes, length))
788 return TRUE;
789 else {
790 copyMapCursor(&tempCursor, &cursor->mapCursor);
791 return FALSE;
792 }
793 }
794 case _kCFBurstTrieCursorTrieType:
795 return FALSE;
796 }
797 return FALSE;
798 }
799
800 Boolean CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor, uint32_t *payload)
801 {
802 switch (cursor->cursorType) {
803 case _kCFBurstTrieCursorMapType:
804 return getMapCursorPayload(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, payload);
805 case _kCFBurstTrieCursorTrieType:
806 return FALSE;
807 }
808 return FALSE;
809 }
810
811 void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor)
812 {
813 if (!cursor)
814 return;
815 free(cursor);
816 }
817
818 void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor, void *ctx, CFBurstTrieTraversalCallback callback)
819 {
820 if (!cursor)
821 return;
822
823 UInt8 *bytes = (UInt8*)calloc(1, MAX_KEY_LENGTH);
824 uint32_t capacity = MAX_KEY_LENGTH;
825 uint32_t length = 0;
826 Boolean stop = FALSE;
827 switch (cursor->cursorType) {
828 case _kCFBurstTrieCursorMapType: {
829 CompactMapCursor tempCursor;
830 copyMapCursor(&cursor->mapCursor, &tempCursor);
831 traverseFromMapCursor(cursor->trie, &tempCursor, bytes, capacity,length, &stop, ctx, callback);
832 break;
833 }
834 case _kCFBurstTrieCursorTrieType:
835 break;
836 }
837 free(bytes);
838 }
839
840 #if 0
841 #pragma mark -
842 #pragma mark Insertion
843 #endif
844
845 static ListNodeRef makeCFBurstTrieListNode(const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) {
846 ListNodeRef node = (ListNodeRef) calloc(1, sizeof(*node) + keylen + 1);
847 memcpy(node->string, key, keylen);
848 node->string[keylen] = 0;
849 node->next = 0;
850 node->length = keylen;
851 node->weight = weight;
852 node->payload = payload;
853 return node;
854 }
855
856 static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) {
857 if (keylen) {
858 NextTrie next = root->slots[*key];
859 ListNodeRef newNode = makeCFBurstTrieListNode(key+1, keylen-1, weight, payload);
860 newNode->weight = weight;
861 newNode->next = (ListNodeRef) NextTrie_GetPtr(next);
862 next = (uintptr_t) newNode;
863 NextTrie_SetKind(next, ListKind);
864 root->slots[*key] = next;
865 } else {
866 // ** Handle payload.
867 root->weight = weight;
868 root->payload = payload;
869 }
870 }
871
872 static TrieLevelRef burstCFBurstTrieLevel(CFBurstTrieRef trie, ListNodeRef list, uint32_t listCount) {
873 TrieLevelRef newLevel = (TrieLevelRef) calloc(1, sizeof(struct _TrieLevel));
874 while (list) {
875 addCFBurstTrieBurstLevel(trie, newLevel, list->string, list->length, list->weight, list->payload);
876 ListNodeRef temp = list;
877 list = list->next;
878 free(temp);
879 }
880 return newLevel;
881 }
882
883 static CFBTInsertCode addCFBurstTrieListNode(CFBurstTrieRef trie, ListNodeRef list, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload, uint32_t *listCount)
884 {
885 CFBTInsertCode code = FailedInsert;
886 uint32_t count = 1;
887
888 ListNodeRef last = list;
889 while (list) {
890 if (list->length == keylen && memcmp(key, list->string, keylen) == 0) {
891 list->weight += weight;
892 list->payload = payload;
893 code = ExistingTerm;
894 break;
895 } else {
896 count++;
897 last = list;
898 list = list->next;
899 }
900 }
901
902 if (!list) {
903 last->next = makeCFBurstTrieListNode(key, keylen, weight, payload);
904 code = NewTerm;
905 }
906
907 *listCount = count;
908 return code;
909 }
910
911 static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload)
912 {
913 CFBTInsertCode code = FailedInsert;
914 if (keylen) {
915 NextTrie next = root->slots[*key];
916 if (NextTrie_GetKind(next) == TrieKind) {
917 TrieLevelRef nextLevel = (TrieLevelRef) NextTrie_GetPtr(next);
918 code = addCFBurstTrieLevel(trie, nextLevel, key+1, keylen-1, weight, payload);
919 } else {
920 if (NextTrie_GetKind(next) == ListKind) {
921 uint32_t listCount;
922 ListNodeRef listNode = (ListNodeRef) NextTrie_GetPtr(next);
923 code = addCFBurstTrieListNode(trie, listNode, key+1, keylen-1, weight, payload, &listCount);
924 if (listCount > trie->containerSize) {
925 next = (uintptr_t) burstCFBurstTrieLevel(trie, listNode, listCount);
926 NextTrie_SetKind(next, TrieKind);
927 }
928 } else {
929 // ** Make a new list node
930 next = (uintptr_t) makeCFBurstTrieListNode(key+1, keylen-1, weight, payload);
931 NextTrie_SetKind(next, ListKind);
932 code = NewTerm;
933 }
934 root->slots[*key] = next;
935 }
936 } else {
937 // ** Handle payload
938 if (!root->weight) code = NewTerm;
939 else code = ExistingTerm;
940 root->weight += weight;
941 root->payload = payload;
942 }
943
944 return code;
945 }
946 #if 0
947 #pragma mark -
948 #pragma mark Searching
949 #endif
950 static void findCFBurstTrieList(CFBurstTrieRef trie, TrieCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
951 {
952 ListNodeRef list = (ListNodeRef)NextTrie_GetPtr(cursor->next);
953 int len = cursor->prefixlen-cursor->keylen;
954 len = len <= 0 ? 0 : len;
955 while (list) {
956 int lencompare = list->length-len;
957 if (list->length >= len &&
958 (len == 0 || memcmp(list->string, cursor->prefix+cursor->keylen, len) == 0)) {
959 memcpy(cursor->key+cursor->keylen, list->string, list->length);
960 cursor->key[cursor->keylen+list->length] = 0;
961 cursor->next = (NextTrie)list;
962 if (list->payload && callback(ctx, cursor->key, list->payload, lencompare==0)) return;
963 }
964 list = list->next;
965 }
966 }
967
968 static void findCFBurstTrieMappedPage(CFBurstTrieRef trie, MapCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
969 {
970 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
971 uint32_t end = page->length;
972 uint32_t cur = 0;
973 int len = cursor->prefixlen-cursor->keylen;
974 len = len <= 0 ? 0 : len;
975 if (trie->cflags & kCFBurstTriePrefixCompression) {
976 uint8_t pfx[CHARACTER_SET_SIZE];
977 PageEntryPacked *lastEntry = 0;
978 while (cur < end) {
979 PageEntryPacked *entry = (PageEntryPacked *)&page->data[cur];
980 int lencompare = (entry->strlen+entry->pfxLen)-len;
981 if (lastEntry && entry->pfxLen>lastEntry->pfxLen) memcpy(pfx+lastEntry->pfxLen, lastEntry->string, entry->pfxLen-lastEntry->pfxLen);
982 if (lencompare >= 0 &&
983 (len == 0 || (__builtin_memcmp(pfx, cursor->prefix+cursor->keylen, entry->pfxLen) == 0 &&
984 __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen+entry->pfxLen, cursor->prefixlen-cursor->keylen-entry->pfxLen) == 0))) {
985 memcpy(cursor->key+cursor->keylen, pfx, entry->pfxLen);
986 memcpy(cursor->key+cursor->keylen+entry->pfxLen, entry->string, entry->strlen);
987 cursor->key[cursor->keylen+entry->pfxLen+entry->strlen] = 0;
988 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return;
989 }
990 lastEntry = entry;
991 cur += sizeof(*entry) + entry->strlen;
992 }
993 } else {
994 while (cur < end) {
995 PageEntry *entry = (PageEntry *)&page->data[cur];
996 int lencompare = entry->strlen-len;
997 if (lencompare >= 0 && __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen, len) == 0) {
998 memcpy(cursor->key+cursor->keylen, entry->string, entry->strlen);
999 cursor->key[cursor->keylen+entry->strlen] = 0;
1000 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return;
1001 }
1002 cur += sizeof(*entry) + entry->strlen;
1003 }
1004 }
1005 }
1006
1007
1008 static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
1009 {
1010 if (cursor->keylen < cursor->prefixlen) {
1011 cursor->next = ((TrieLevelRef)NextTrie_GetPtr(cursor->next))->slots[cursor->prefix[cursor->keylen]];
1012 cursor->key[cursor->keylen++] = cursor->prefix[cursor->keylen];
1013
1014 if (NextTrie_GetKind(cursor->next) == TrieKind) {
1015 findCFBurstTrieLevel(trie, cursor, exactmatch, ctx, callback);
1016 } else if (NextTrie_GetKind(cursor->next) == ListKind) {
1017 findCFBurstTrieList(trie, cursor, ctx, callback);
1018 }
1019 } else {
1020 TrieLevelRef root = (TrieLevelRef)NextTrie_GetPtr(cursor->next);
1021 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
1022 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1023 traverseCFBurstTrieLevel(trie, root, cursor, exactmatch, ctx, callback);
1024 }
1025 }
1026
1027 static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
1028 {
1029 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1030 if (cursor->keylen < cursor->prefixlen) {
1031 uint8_t mykey = *(cursor->prefix+cursor->keylen);
1032 cursor->key[cursor->keylen++] = *(cursor->prefix+cursor->keylen);
1033
1034 uint8_t slot = mykey / 64;
1035 uint8_t bit = mykey % 64;
1036 uint32_t item = 0;
1037 uint64_t bword = root->bitmap[slot];
1038
1039 if (bword & (1ull << bit)) {
1040 // ** Count all the set bits up to this bit
1041 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]);
1042 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1043 uint32_t offset = root->slots[item];
1044 cursor->next = offset;
1045
1046 if (DiskNextTrie_GetKind(offset) == TrieKind) {
1047 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback);
1048 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1049 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback);
1050 } else if (DiskNextTrie_GetKind(offset) == ListKind) {
1051 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1052 }
1053 }
1054 } else {
1055 if(root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
1056 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1057 traverseCFBurstTrieCompactMappedLevel(trie, root, cursor, exactmatch, ctx, callback);
1058 }
1059 }
1060
1061 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
1062 {
1063 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1064 if (cursor->keylen < cursor->prefixlen) {
1065 uint8_t slot = *(cursor->prefix+cursor->keylen);
1066 cursor->next = root->slots[slot];
1067 cursor->key[cursor->keylen++] = slot;
1068
1069 if (DiskNextTrie_GetKind(cursor->next) == TrieKind) {
1070 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback);
1071 } else if (DiskNextTrie_GetKind(cursor->next) == CompactTrieKind) {
1072 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback);
1073 } else if (DiskNextTrie_GetKind(cursor->next) == ListKind) {
1074 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1075 }
1076 } else {
1077 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
1078 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1079 traverseCFBurstTrieMappedLevel(trie, root, cursor, exactmatch, ctx, callback);
1080 }
1081 }
1082
1083 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
1084 {
1085 cursor->key[cursor->keylen] = 0;
1086 uint32_t len = cursor->keylen;
1087 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1088 NextTrie next = root->slots[i];
1089 cursor->keylen = len;
1090 cursor->key[cursor->keylen++] = i;
1091
1092 if (NextTrie_GetKind(next) == TrieKind) {
1093 TrieLevelRef level = (TrieLevelRef)NextTrie_GetPtr(next);
1094 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1095 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1096 traverseCFBurstTrieLevel(trie, level, cursor, exactmatch, ctx, callback);
1097 } else if (NextTrie_GetKind(next) == ListKind) {
1098 cursor->next = next;
1099 cursor->key[cursor->keylen] = 0;
1100 findCFBurstTrieList(trie, cursor, ctx, callback);
1101 }
1102 }
1103 }
1104
1105 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
1106 {
1107 cursor->key[cursor->keylen] = 0;
1108 uint32_t len = cursor->keylen;
1109
1110 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1111 uint32_t offset = (uint32_t)root->slots[i];
1112 cursor->keylen = len;
1113 cursor->key[cursor->keylen++] = i;
1114
1115 if (DiskNextTrie_GetKind(offset) == TrieKind) {
1116 MapTrieLevelRef level = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
1117 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1118 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1119 traverseCFBurstTrieMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
1120 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1121 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
1122 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1123 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1124 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
1125 } else if (DiskNextTrie_GetKind(offset) == ListKind) {
1126 cursor->next = offset;
1127 cursor->key[cursor->keylen] = 0;
1128 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1129 }
1130 }
1131 }
1132
1133 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
1134 {
1135 cursor->key[cursor->keylen] = 0;
1136 uint32_t len = cursor->keylen;
1137 for (uint32_t c=0; c < CHARACTER_SET_SIZE; c++) {
1138 //** This could be optimized to remember what the last slot/item was and not count bits over and over.
1139 uint8_t mykey = c;
1140 uint32_t slot = mykey / 64;
1141 uint32_t bit = mykey % 64;
1142 uint32_t item = 0;
1143 uint64_t bword = root->bitmap[slot];
1144 cursor->keylen = len;
1145
1146 if (bword & (1ull << bit)) {
1147 // ** Count all the set bits up to this bit
1148 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]);
1149 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1150 uint32_t offset = root->slots[item];
1151 cursor->key[cursor->keylen++] = mykey;
1152
1153 if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1154 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
1155 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1156 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1157 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
1158 } else if(DiskNextTrie_GetKind(offset) == TrieKind) {
1159 traverseCFBurstTrieMappedLevel(trie, (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset), cursor, exactmatch, ctx, callback);
1160 } else if (DiskNextTrie_GetKind(offset) == ListKind) {
1161 cursor->next = offset;
1162 cursor->key[cursor->keylen] = 0;
1163 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1164 }
1165 }
1166 }
1167 }
1168
1169 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) {
1170 if (trie->mapBase) {
1171 if (trie->cflags & kCFBurstTriePrefixCompression) {
1172 fprintf(stderr, "Please use CFBurstTrieCursorRef API for file based trie.\n");
1173 return;
1174 } else {
1175 TrieHeader *header = (TrieHeader *)trie->mapBase;
1176 MapCursor csr;
1177 csr.next = header->rootOffset;
1178 csr.prefix = prefix;
1179 csr.prefixlen = prefixLen;
1180 csr.key[0] = 0;
1181 csr.keylen = 0;
1182 findCFBurstTrieMappedLevel(trie, &csr, exactmatch, ctx, callback);
1183 }
1184 } else {
1185 TrieCursor csr;
1186 csr.next = ((uintptr_t)&trie->root)|TrieKind;
1187 csr.prefix = prefix;
1188 csr.prefixlen = prefixLen;
1189 csr.key[0] = 0;
1190 csr.keylen = 0;
1191 findCFBurstTrieLevel(trie, &csr, exactmatch, ctx, callback);
1192 }
1193 }
1194
1195 CF_INLINE uint32_t getPackedPageEntrySize(PageEntryPacked *entry)
1196 {
1197 return sizeof(PageEntryPacked) + entry->strlen;
1198 }
1199
1200 CF_INLINE uint32_t getPageEntrySize(PageEntry *entry)
1201 {
1202 return sizeof(PageEntry) + entry->strlen;
1203 }
1204
1205 /*
1206 static void _printPageEntry(PageEntryPacked *entry) {
1207 printf("entry 0x%p:\n", entry);
1208 printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen);
1209 if (entry->strlen > 0) {
1210 printf("string = ");
1211 for (int i = 0; i < entry->strlen; ++i)
1212 printf("%d ", entry->string[i]);
1213 printf("\n");
1214 }
1215 printf("\n");
1216 }
1217 */
1218 static Boolean advanceCursorOnMappedPageForByte(Page *page, CompactMapCursor *cursor, UInt8 byte) {
1219 PageEntryPacked *entry;
1220 Boolean found = FALSE;
1221 uint32_t minPrefixLength = 0;
1222
1223 if (cursor->isOnPage) {
1224 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage];
1225 //_printPageEntry(entry);
1226 BOOL shouldContinue = TRUE;
1227
1228 if (!(cursor->entryOffsetInPage == 0 && entry->strlen == 0)) {
1229 if (cursor->entryOffsetInPage == entry->strlen - 1) {
1230 minPrefixLength = entry->pfxLen + entry->strlen;
1231 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1232 } else {
1233 cursor->offsetInEntry++;
1234 if (entry->string[cursor->offsetInEntry] == byte)
1235 found = TRUE;
1236 else if (entry->string[cursor->offsetInEntry] > byte)
1237 shouldContinue = FALSE;
1238 else {
1239 minPrefixLength = entry->pfxLen + cursor->offsetInEntry;
1240 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1241 }
1242 }
1243 }
1244
1245 if (found) {
1246 cursor->isOnPage = TRUE;
1247 return TRUE;
1248 } else if (!shouldContinue)
1249 return FALSE;
1250 } else {
1251 cursor->entryOffsetInPage = 0;
1252 }
1253
1254 uint32_t pageSize = page->length - sizeof(Page);
1255 while (cursor->entryOffsetInPage < pageSize) {
1256 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage];
1257 //_printPageEntry(entry);
1258 if (minPrefixLength > entry->pfxLen)
1259 break;
1260 else if (minPrefixLength < entry->pfxLen)
1261 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1262 else {
1263 if (entry->strlen == 0)
1264 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1265 else {
1266 if (entry->string[0] > byte)
1267 // Entries are sorted alphabetically
1268 break;
1269 else if (entry->string[0] < byte)
1270 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1271 else {
1272 cursor->offsetInEntry = 0;
1273 found = TRUE;
1274 break;
1275 }
1276 }
1277 }
1278 }
1279
1280 if (found)
1281 cursor->isOnPage = TRUE;
1282
1283 return found;
1284 }
1285
1286 static Boolean advanceCursorMappedPageWithPerfixCompression(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1287 {
1288 if (length == 0) {
1289 PageEntryPacked *entry = (PageEntryPacked*)&page->data[0];
1290 if (!cursor->isOnPage) {
1291 cursor->entryOffsetInPage = 0;
1292 cursor->offsetInEntry = 0;
1293 cursor->isOnPage = entry->pfxLen == 0 && entry->strlen == 0;
1294 }
1295 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload);
1296 return TRUE;
1297 }
1298
1299 for (CFIndex i = 0; i < length; ++i) {
1300 if (!advanceCursorOnMappedPageForByte(page, cursor, bytes[i]))
1301 return FALSE;
1302 }
1303 PageEntryPacked *entry = (PageEntryPacked*)&page->data[cursor->entryOffsetInPage];
1304 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload);
1305 return TRUE;
1306 }
1307
1308 static Boolean advanceCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1309 {
1310 if (length == 0) {
1311 PageEntry*entry = (PageEntry*)&page->data[0];
1312 if (!cursor->isOnPage) {
1313 cursor->entryOffsetInPage = 0;
1314 cursor->offsetInEntry = 0;
1315 cursor->isOnPage = entry->strlen == 0;
1316 }
1317 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload);
1318 return TRUE;
1319 }
1320
1321 PageEntry *entry;
1322 uint32_t pageSize = page->length - sizeof(Page);
1323 const UInt8 * prefix = NULL;
1324 uint32_t prefixLength = 0;
1325
1326 if (cursor->isOnPage) {
1327 entry = (PageEntry*)&page->data[cursor->entryOffsetInPage];
1328 prefix = entry->string;
1329 prefixLength = cursor->offsetInEntry + 1;
1330 }
1331
1332 while (cursor->entryOffsetInPage < pageSize) {
1333 PageEntry *entry = (PageEntry*)&page->data[cursor->entryOffsetInPage];
1334 if (entry->strlen == 0) {
1335 cursor->entryOffsetInPage += getPageEntrySize(entry);
1336 continue;
1337 }
1338
1339 if (entry->strlen <= prefixLength)
1340 return FALSE;
1341 else {
1342 int cmpResult = 0;
1343 if (prefixLength > 0)
1344 cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength);
1345 if (cmpResult > 0)
1346 return FALSE;
1347 else if (cmpResult == 0) {
1348 if (entry->strlen < prefixLength + length) {
1349 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, entry->strlen - prefixLength);
1350 if (cmpResult2 > 0)
1351 return FALSE;
1352 else
1353 cursor->entryOffsetInPage += getPageEntrySize(entry);
1354 } else {
1355 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, length);
1356 if (cmpResult2 > 0)
1357 return FALSE;
1358 else if (cmpResult2 == 0) {
1359 cursor->isOnPage = TRUE;
1360 cursor->offsetInEntry = prefixLength + length - 1;
1361 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload);
1362 return TRUE;
1363 } else
1364 cursor->entryOffsetInPage += getPageEntrySize(entry);
1365 }
1366 } else
1367 cursor->entryOffsetInPage += getPageEntrySize(entry);
1368 }
1369 }
1370 return FALSE;
1371 }
1372
1373 static Boolean advanceCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1374 {
1375 if (!bytes || length < 0)
1376 return FALSE;
1377
1378 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1379 uint32_t pageSize = page->length - sizeof(Page);
1380 if (pageSize == 0)
1381 return FALSE;
1382
1383 if (trie->cflags & kCFBurstTrieSortByKey)
1384 return advanceCursorMappedPageSortedByKey(page, cursor, bytes, length);
1385 else if (trie->cflags & kCFBurstTriePrefixCompression)
1386 return advanceCursorMappedPageWithPerfixCompression(page, cursor, bytes, length);
1387 else
1388 return FALSE;
1389 }
1390
1391 static Boolean advanceCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1392 {
1393 if (!bytes || length < 0)
1394 return FALSE;
1395
1396 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1397 if (length == 0) {
1398 cursor->payload = root->payload;
1399 return TRUE;
1400 }
1401
1402 uint8_t slot = bytes[0] / 64;
1403 uint8_t bit = bytes[0] % 64;
1404 uint32_t item = 0;
1405 uint64_t bword = root->bitmap[slot];
1406 if (bword & (1ull << bit)) {
1407 // Count all the set bits up to this bit
1408 for (int i = 0; i < slot; ++i)
1409 item += __builtin_popcountll(root->bitmap[i]);
1410 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1411 cursor->next = root->slots[item];
1412 return advanceMapCursor(trie, cursor, bytes + 1, length - 1);
1413 } else {
1414 return FALSE;
1415 }
1416 }
1417
1418 static Boolean advanceCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1419 {
1420 if (!bytes || length < 0)
1421 return FALSE;
1422
1423 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1424 if (length == 0) {
1425 cursor->payload = root->payload;
1426 return TRUE;
1427 }
1428
1429 cursor->next = root->slots[bytes[0]];
1430 return advanceMapCursor(trie, cursor, bytes + 1, length - 1);
1431 }
1432
1433 static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1434 {
1435 bool result = FALSE;
1436 switch (DiskNextTrie_GetKind(cursor->next)) {
1437 case TrieKind:
1438 result = advanceCursorMappedLevel(trie, cursor, bytes, length);
1439 break;
1440 case CompactTrieKind:
1441 result = advanceCursorCompactMappedLevel(trie, cursor, bytes, length);
1442 break;
1443 case ListKind:
1444 result = advanceCursorMappedPage(trie, cursor, bytes, length);
1445 break;
1446 case Nothing: {
1447 TrieHeader *header = (TrieHeader*)trie->mapBase;
1448 if (cursor->next == header->rootOffset)
1449 result = advanceCursorMappedLevel(trie, cursor, bytes, length);
1450 }
1451 }
1452
1453 return result;
1454 }
1455
1456 static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1457 {
1458 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1459 if (root->payload) {
1460 callback(ctx, bytes, length, root->payload, stop);
1461 if (*stop)
1462 return;
1463 }
1464
1465 if (length >= capacity)
1466 return;
1467
1468 for (int i = 0; i < CHARACTER_SET_SIZE; ++i) {i =
1469 bytes[length] = i;
1470 cursor->next = (uint32_t)root->slots[i];;
1471 cursor->isOnPage = FALSE;
1472 cursor->entryOffsetInPage = 0;
1473 cursor->offsetInEntry = 0;
1474 cursor->payload = 0;
1475 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback);
1476 if (*stop)
1477 return;
1478 }
1479 }
1480
1481 static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1482 {
1483 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1484 if (root->payload) {
1485 callback(ctx, bytes, length, root->payload, stop);
1486 if (*stop)
1487 return;
1488 }
1489
1490 if (length >= capacity)
1491 return;
1492 for (int c = 0; c < CHARACTER_SET_SIZE; ++c) {
1493 bytes[length] = c;
1494 //This could be optimized to remember what the last slot/item was and not count bits over and over.
1495 uint8_t slot = c / 64;
1496 uint8_t bit = c % 64;
1497 uint32_t item = 0;
1498 uint64_t bword = root->bitmap[slot];
1499 if (bword & (1ull << bit)) {
1500 // Count all the set bits up to this bit
1501 for (int i = 0; i < slot; ++i)
1502 item += __builtin_popcountll(root->bitmap[i]);
1503 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1504 cursor->next = root->slots[item];
1505 cursor->isOnPage = FALSE;
1506 cursor->entryOffsetInPage = 0;
1507 cursor->offsetInEntry = 0;
1508 cursor->payload = 0;
1509 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback);
1510 if (*stop)
1511 return;
1512 }
1513 }
1514 }
1515
1516 static void traverseFromMapCursorMappedPageWithPrefixCompression(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1517 {
1518 uint32_t pageSize = page->length - sizeof(Page);
1519 uint32_t offset = cursor->entryOffsetInPage;
1520 uint32_t minPrefixLength = 0;
1521 if (cursor->isOnPage) {
1522 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset];
1523 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1;
1524 if (remainingLength >= 0 && remainingLength <= capacity) {
1525 memcpy(bytes + length, entry->string + cursor->offsetInEntry + 1, remainingLength);
1526 callback(ctx, bytes, length + remainingLength, entry->payload, stop);
1527 if (*stop)
1528 return;
1529 }
1530 minPrefixLength = entry->pfxLen + cursor->offsetInEntry;
1531 offset += getPackedPageEntrySize(entry);
1532 }
1533 PageEntryPacked *previousEntry = NULL;
1534 while (offset < pageSize) {
1535 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset];
1536 if (minPrefixLength > entry->pfxLen)
1537 break;
1538 else if (entry->payload && entry->strlen <= capacity) {
1539 if (previousEntry)
1540 length -= previousEntry->strlen + previousEntry->pfxLen - entry->pfxLen;
1541 memcpy(bytes + length, entry->string, entry->strlen);
1542 callback(ctx, bytes, length + entry->strlen, entry->payload, stop);
1543 length += entry->strlen;
1544 if (*stop)
1545 return;
1546 }
1547 previousEntry = entry;
1548 offset += getPackedPageEntrySize(entry);
1549 }
1550 }
1551
1552 static void traverseFromMapCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1553 {
1554 uint32_t pageSize = page->length - sizeof(Page);
1555 uint32_t offset = cursor->entryOffsetInPage;
1556 uint32_t prefixLength = 0;
1557 const UInt8 *prefix = NULL;
1558 if (cursor->isOnPage) {
1559 PageEntry *entry = (PageEntry*)&page->data[offset];
1560 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1;
1561 if (remainingLength >= 0 && remainingLength <= capacity) {
1562 memcpy(bytes + length, entry->string + cursor->offsetInEntry, remainingLength);
1563 callback(ctx, bytes, length + remainingLength, entry->payload, stop);
1564 if (*stop)
1565 return;
1566 }
1567 prefixLength = cursor->offsetInEntry + 1;
1568 prefix = entry->string;
1569 offset += getPageEntrySize(entry);
1570 }
1571
1572 while (offset < pageSize) {
1573 PageEntry *entry = (PageEntry*)&page->data[offset];
1574 if (entry->strlen < prefixLength)
1575 break;
1576 else {
1577 int cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength);
1578 if (cmpResult > 0)
1579 break;
1580 else if (entry->payload && entry->strlen <= capacity) {
1581 if (entry->strlen > 0)
1582 memcpy(bytes + length, entry->string + prefixLength, entry->strlen - prefixLength);
1583 callback(ctx, bytes, length + entry->strlen - prefixLength, entry->payload, stop);
1584 if (*stop)
1585 return;
1586 }
1587 offset += getPageEntrySize(entry);
1588 }
1589 }
1590 }
1591
1592 static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1593 {
1594 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1595 if (trie->cflags & kCFBurstTrieSortByKey)
1596 traverseFromMapCursorMappedPageSortedByKey(page, cursor, bytes, capacity, length, stop, ctx, callback);
1597 else if (trie->cflags & kCFBurstTriePrefixCompression)
1598 traverseFromMapCursorMappedPageWithPrefixCompression(page, cursor, bytes, capacity, length, stop, ctx, callback);
1599 }
1600
1601 void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1602 {
1603 switch (DiskNextTrie_GetKind(cursor->next)) {
1604 case TrieKind:
1605 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1606 break;
1607 case CompactTrieKind:
1608 traverseFromMapCursorCompactMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1609 break;
1610 case ListKind:
1611 traverseFromMapCursorMappedPage(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1612 break;
1613 case Nothing: {
1614 TrieHeader *header = (TrieHeader*)trie->mapBase;
1615 if (cursor->next == header->rootOffset) {
1616 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1617 }
1618 break;
1619 }
1620 }
1621 }
1622
1623 void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination)
1624 {
1625 destination->next = source->next;
1626 destination->entryOffsetInPage = source->entryOffsetInPage;
1627 destination->offsetInEntry = source->offsetInEntry;
1628 destination->isOnPage = source->isOnPage;
1629 destination->payload = source->payload;
1630 }
1631
1632 Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs)
1633 {
1634 return lhs->entryOffsetInPage == rhs->entryOffsetInPage && lhs->isOnPage == rhs->isOnPage && lhs->next == rhs->next && lhs->offsetInEntry == rhs->offsetInEntry;
1635 }
1636
1637 static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload)
1638 {
1639 if (payload)
1640 *payload = 0;
1641 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) {
1642 if (payload)
1643 *payload = entry->payload;
1644 return TRUE;
1645 } else if (cursor->offsetInEntry != entry->strlen - 1)
1646 return FALSE;
1647 else {
1648 if (payload)
1649 *payload = entry->payload;
1650 return TRUE;
1651 }
1652 }
1653
1654 static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload)
1655 {
1656 if (payload)
1657 *payload = 0;
1658 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) {
1659 if (payload)
1660 *payload = entry->payload;
1661 return TRUE;
1662 } else if (cursor->offsetInEntry != entry->strlen - 1)
1663 return FALSE;
1664 else {
1665 if (payload)
1666 *payload = entry->payload;
1667 return TRUE;
1668 }
1669 }
1670
1671 Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload)
1672 {
1673 if (!cursor)
1674 return FALSE;
1675 if (cursor->payload) {
1676 if (payload)
1677 *payload = cursor->payload;
1678 return TRUE;
1679 }
1680 return FALSE;
1681 }
1682
1683 // Legacy
1684
1685 static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
1686 Boolean success = false;
1687 if (length) {
1688 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[*key]);
1689 if(DiskNextTrie_GetKind(offset) == TrieKind) {
1690 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map,offset), map, key+1, length-1, payload, prefix);
1691 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1692 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
1693 } else {
1694 if(DiskNextTrie_GetKind(offset) == ListKind) {
1695 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix);
1696 } else {
1697 return success;
1698 }
1699 }
1700 } else {
1701 if (trie->weight) {
1702 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload));
1703 success = true;
1704 }
1705 }
1706 return success;
1707 }
1708
1709 static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
1710 Boolean success = false;
1711 if (length) {
1712 uint32_t mykey = *key;
1713 uint32_t slot = mykey / 64;
1714 uint32_t bit = mykey % 64;
1715 uint32_t item = 0;
1716 uint64_t bword = CFSwapInt64LittleToHost(trie->bitmap[slot]);
1717 if (bword & (1ull << bit)) {
1718 /* Count all the set bits up to this bit */
1719 for (int i=0; i < slot; i++) {
1720 item += __builtin_popcountll(trie->bitmap[i]);
1721 }
1722 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1723 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[item]);
1724 if(DiskNextTrie_GetKind(offset) == TrieKind) {
1725 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
1726 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1727 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
1728 }
1729 else {
1730 if(DiskNextTrie_GetKind(offset) == ListKind) {
1731 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix);
1732 } else {
1733 return success;
1734 }
1735 }
1736 }
1737 } else {
1738 if (trie->weight) {
1739 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload));
1740 success = true;
1741 }
1742 }
1743 return success;
1744 }
1745
1746 static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
1747 Boolean success = false;
1748 uint32_t end = CFSwapInt32LittleToHost(page->length);
1749 uint32_t cur = 0;
1750 if (prefix) {
1751 uint8_t pfx[256];
1752 while (cur < end) {
1753 StringPageEntryPacked *entry = (StringPageEntryPacked *)&page->data[cur];
1754 uint16_t strlen = entry->pfxLen+CFSwapInt16LittleToHost(entry->strlen);
1755 if (strlen == length && __builtin_memcmp(pfx, key, entry->pfxLen) == 0 && __builtin_memcmp(entry->string, key+entry->pfxLen, length-entry->pfxLen) == 0) {
1756 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload));
1757 success = true;
1758 return success;
1759 } else {
1760 memcpy(pfx+entry->pfxLen, entry->string, MIN(255-entry->pfxLen, length-entry->pfxLen));
1761 cur += sizeof(*entry) + strlen - entry->pfxLen;
1762 }
1763 }
1764 } else {
1765 while (cur < end) {
1766 StringPageEntry *entry = (StringPageEntry *)&page->data[cur];
1767 uint16_t strlen = CFSwapInt16LittleToHost(entry->strlen);
1768 if (strlen == length && __builtin_memcmp(entry->string, key, length) == 0) {
1769 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload));
1770 success = true;
1771 return success;
1772 } else {
1773 cur += sizeof(*entry) + strlen;
1774 }
1775 }
1776
1777 }
1778 return success;
1779 }
1780
1781
1782 #if 0
1783 #pragma mark -
1784 #pragma mark Serialization
1785 #endif
1786
1787 static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie, TrieLevelRef root, uint32_t *offset, off_t start_offset, bool dispose, bool isroot, int fd)
1788 {
1789 bool dense = true;
1790 int count = 0;
1791
1792 for (int i=0; i < CHARACTER_SET_SIZE; i++) if (root->slots[i]) count++;
1793
1794 uint32_t this_offset = *offset;
1795
1796 if ((trie->cflags & kCFBurstTrieBitmapCompression) && count < MAX_BITMAP_SIZE && !isroot) {
1797 size_t size = sizeof(CompactMapTrieLevel) + sizeof(uint32_t) * count;
1798 int offsetSlot = 0;
1799
1800 CompactMapTrieLevel *maptrie = (CompactMapTrieLevel *)alloca(size);
1801 bzero(maptrie, size);
1802 *offset += size;
1803
1804 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1805 NextTrie next = root->slots[i];
1806 if (next) {
1807 uint32_t slot = i / 64;
1808 uint32_t bit = i % 64;
1809 maptrie->bitmap[slot] |= 1ull<<bit;
1810 if (NextTrie_GetKind(next) == TrieKind) {
1811 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
1812 uint32_t childOffset = *offset;
1813 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) {
1814 maptrie->slots[offsetSlot] = (TrieKind|childOffset);
1815 } else {
1816 maptrie->slots[offsetSlot] = (CompactTrieKind|childOffset);
1817 }
1818 } else {
1819 maptrie->slots[offsetSlot] = next;
1820 }
1821 offsetSlot++;
1822 }
1823 }
1824 maptrie->payload = root->payload;
1825
1826 int bitcount = 0;
1827 for (int i=0; i < 4; i++) bitcount += __builtin_popcountll(maptrie->bitmap[i]);
1828 assert(bitcount == count);
1829
1830 pwrite(fd, maptrie, size, this_offset+start_offset);
1831 dense = false;
1832 } else {
1833 MapTrieLevel maptrie;
1834 *offset += sizeof(maptrie);
1835
1836 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1837 NextTrie next = root->slots[i];
1838 if (NextTrie_GetKind(next) == TrieKind) {
1839 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
1840 uint32_t childOffset = *offset;
1841 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) {
1842 maptrie.slots[i] = (TrieKind|childOffset);
1843 } else {
1844 maptrie.slots[i] = (CompactTrieKind|childOffset);
1845 }
1846 } else {
1847 maptrie.slots[i] = next;
1848 }
1849 }
1850 maptrie.payload = root->payload;
1851 pwrite(fd, &maptrie, sizeof(maptrie), this_offset+start_offset);
1852 }
1853
1854 if (dispose) free(root);
1855 return dense;
1856 }
1857
1858 static void serializeCFBurstTrieList(CFBurstTrieRef trie, ListNodeRef listNode, int fd)
1859 {
1860 uint32_t listCount;
1861 size_t size = trie->containerSize;
1862
1863 // ** Temp list of nodes to sort
1864 ListNodeRef *nodes = (ListNodeRef *)malloc(sizeof(ListNodeRef) * size);
1865 for (listCount = 0; listNode; listCount++) {
1866 if (listCount >= size) {
1867 size *= 2;
1868 nodes = (ListNodeRef *)realloc(nodes, sizeof(ListNodeRef) * size);
1869 }
1870 nodes[listCount] = listNode;
1871 listNode = listNode->next;
1872 }
1873
1874 char _buffer[MAX_BUFFER_SIZE];
1875 size_t bufferSize = (sizeof(Page) + size * (sizeof(PageEntryPacked) + MAX_STRING_SIZE));
1876 char *buffer = bufferSize < MAX_BUFFER_SIZE ? _buffer : (char *) malloc(bufferSize);
1877
1878 Page *page = (Page *)buffer;
1879 uint32_t current = 0;
1880
1881 if (trie->cflags & kCFBurstTriePrefixCompression) {
1882 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare);
1883
1884 ListNodeRef last = 0;
1885 for (int i=0; i < listCount; i++) {
1886 listNode = nodes[i];
1887 uint8_t pfxLen = 0;
1888 if (last) {
1889 for ( ;
1890 pfxLen < CHARACTER_SET_SIZE-1 &&
1891 pfxLen < listNode->length &&
1892 pfxLen < last->length &&
1893 listNode->string[pfxLen] == last->string[pfxLen];
1894 pfxLen++);
1895 }
1896
1897 PageEntryPacked *entry = (PageEntryPacked *)(&page->data[current]);
1898 entry->strlen = listNode->length - pfxLen;
1899 entry->payload = listNode->payload;
1900 entry->pfxLen = pfxLen;
1901 memcpy(entry->string, listNode->string+pfxLen, listNode->length-pfxLen);
1902 current += listNode->length - pfxLen + sizeof(PageEntryPacked);
1903 last = listNode;
1904 }
1905 } else {
1906 if (trie->cflags & kCFBurstTrieSortByKey)
1907 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare);
1908 else
1909 qsort(nodes, listCount, sizeof(ListNodeRef), nodeWeightCompare);
1910
1911 for (int i=0; i < listCount; i++) {
1912 listNode = nodes[i];
1913 PageEntry *entry = (PageEntry *)(&page->data[current]);
1914 entry->strlen = listNode->length;
1915 entry->payload = listNode->payload;
1916 memcpy(entry->string, listNode->string, listNode->length);
1917 current += listNode->length + sizeof(PageEntry);
1918 }
1919 }
1920
1921 size_t len = (sizeof(Page) + current + 3) & ~3;
1922 page->length = current;
1923 write(fd, page, len);
1924
1925 free(nodes);
1926 if (buffer != _buffer) free(buffer);
1927 }
1928
1929 static void serializeCFBurstTrieLists(CFBurstTrieRef trie, TrieLevelRef root, off_t start_offset, int fd)
1930 {
1931 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1932 NextTrie next = root->slots[i];
1933 uint32_t offset;
1934 if (NextTrie_GetKind(next) == TrieKind) {
1935 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
1936 serializeCFBurstTrieLists(trie, nextLevel, start_offset, fd);
1937 } else {
1938 if (NextTrie_GetKind(next) == ListKind) {
1939 ListNodeRef listNode = (ListNodeRef)NextTrie_GetPtr(next);
1940 offset = lseek(fd, 0, SEEK_CUR) - start_offset;
1941 serializeCFBurstTrieList(trie, listNode, fd);
1942 finalizeCFBurstTrieList(listNode);
1943 //assert((offset & 3)==0);
1944 root->slots[i] = (offset|ListKind);
1945 }
1946 }
1947 }
1948 }
1949
1950 static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd)
1951 {
1952 TrieHeader header;
1953 header.signature = 0x0ddba11;
1954 header.rootOffset = 0;
1955 header.count = trie->count;
1956 header.size = 0;
1957 header.flags = trie->cflags;
1958 header.reserved[0] = 0;
1959
1960 uint32_t offset;
1961 lseek(fd, start_offset, SEEK_SET);
1962
1963 size_t header_size = sizeof(header);
1964 write(fd, &header, header_size);
1965
1966 serializeCFBurstTrieLists(trie, &trie->root, start_offset, fd);
1967
1968 offset = lseek(fd, 0, SEEK_CUR) - start_offset;
1969 size_t off = offsetof(TrieHeader, rootOffset);
1970 size_t offsize = sizeof(offset);
1971 pwrite(fd, &offset, offsize, off+start_offset);
1972
1973 serializeCFBurstTrieLevels(trie, &trie->root, &offset, start_offset, false, true, fd);
1974
1975 size_t off2 = offsetof(TrieHeader, size);
1976 offsize = sizeof(offset);
1977 pwrite(fd, &offset, offsize, off2+start_offset);
1978
1979 offset = lseek(fd, 0, SEEK_END);
1980 return (size_t)(offset-start_offset);
1981 }
1982
1983 #if 0
1984 #pragma mark -
1985 #pragma mark Release
1986 #endif
1987
1988 static void destroyCFBurstTrie(CFBurstTrieRef trie) {
1989 if (trie->mapBase) {
1990 #if DEPLOYMENT_TARGET_WINDOWS
1991 UnmapViewOfFile(trie->mapBase);
1992 CloseHandle(trie->mapHandle);
1993 CloseHandle(trie->mappedFileHandle);
1994 #else
1995 munmap(trie->mapBase, trie->mapSize);
1996 #endif
1997 } else {
1998 finalizeCFBurstTrie(&trie->root);
1999 }
2000 free(trie);
2001 return;
2002 }
2003
2004 static void finalizeCFBurstTrie(TrieLevelRef trie) {
2005 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
2006 if (NextTrie_GetKind(trie->slots[i]) == TrieKind) {
2007 finalizeCFBurstTrie((TrieLevelRef)NextTrie_GetPtr(trie->slots[i]));
2008 free((void *)NextTrie_GetPtr(trie->slots[i]));
2009 } else if (NextTrie_GetKind(trie->slots[i]) == ListKind) {
2010 finalizeCFBurstTrieList((ListNodeRef)NextTrie_GetPtr(trie->slots[i]));
2011 }
2012 }
2013 }
2014
2015 static void finalizeCFBurstTrieList(ListNodeRef node) {
2016 do {
2017 ListNodeRef next = node->next;
2018 free(node);
2019 node = next;
2020 } while(node);
2021 }
2022
2023 #if 0
2024 #pragma mark -
2025 #pragma mark Helpers
2026 #endif
2027
2028 static int nodeWeightCompare(const void *a, const void *b) {
2029 ListNodeRef nodeA = *(ListNodeRef *)a;
2030 ListNodeRef nodeB = *(ListNodeRef *)b;
2031 return (nodeB->weight - nodeA->weight);
2032 }
2033
2034 static int nodeStringCompare(const void *a, const void *b) {
2035 ListNodeRef nodeA = *(ListNodeRef *)a;
2036 ListNodeRef nodeB = *(ListNodeRef *)b;
2037 int result = memcmp((char *)nodeA->string, (char *)nodeB->string, MIN(nodeA->length, nodeB->length));
2038 if (result == 0) result = nodeA->length-nodeB->length;
2039 return result;
2040 }
2041
2042 static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact)
2043 {
2044 uint32_t *ctx = (uint32_t *)context;
2045 if (exact) *ctx = payload;
2046 return exact;
2047 }
2048
2049 static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer) {
2050 uint32_t i, j;
2051 for (i = j = 0; i < numChars; i++) {
2052 UniChar c = chars[i];
2053 if (CFStringIsSurrogateHighCharacter(c) && i + 1 < numChars && CFStringIsSurrogateLowCharacter(chars[i + 1])) {
2054 UTF32Char lc = CFStringGetLongCharacterForSurrogatePair(c, chars[i + 1]);
2055 buffer[j++] = 0xf0 + (lc >> 18);
2056 buffer[j++] = 0x80 + ((lc & 0x3ffff) >> 12);
2057 buffer[j++] = 0x80 + ((lc & 0xfff) >> 6);
2058 buffer[j++] = 0x80 + (lc & 0x3f);
2059 i++;
2060 } else {
2061 if (c < 0x80) {
2062 buffer[j++] = c;
2063 } else if (c < 0x800) {
2064 buffer[j++] = 0xc0 + (c >> 6);
2065 buffer[j++] = 0x80 + (c & 0x3f);
2066 } else {
2067 buffer[j++] = 0xe0 + (c >> 12);
2068 buffer[j++] = 0x80 + ((c & 0xfff) >> 6);
2069 buffer[j++] = 0x80 + (c & 0x3f);
2070 }
2071 }
2072 }
2073 buffer[j] = 0;
2074 return j;
2075 }
2076