]> git.saurik.com Git - apple/cf.git/blob - CFBurstTrie.c
CF-855.14.tar.gz
[apple/cf.git] / CFBurstTrie.c
1 /*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFBurstTrie.c
25 Copyright (c) 2008-2013, Apple Inc. All rights reserved.
26 Responsibility: Jennifer Moore
27 */
28
29 #include "CFInternal.h"
30 #include "CFBurstTrie.h"
31 #include "CFByteOrder.h"
32 #include "CFNumber.h"
33 #include <stdio.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <fcntl.h>
37 #include <sys/stat.h>
38 #include <limits.h>
39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
40 #include <unistd.h>
41 #include <sys/param.h>
42 #include <sys/mman.h>
43 #endif
44
45 #include <errno.h>
46 #include <assert.h>
47
48 #if DEPLOYMENT_TARGET_WINDOWS
49 #define open _NS_open
50 #define statinfo _stat
51 #define stat(x,y) _NS_stat(x,y)
52 #define __builtin_memcmp(x, y, z) memcmp(x, y, z)
53 #define __builtin_popcountll(x) popcountll(x)
54 #define bzero(dst, size) ZeroMemory(dst, size)
55 #define S_IWUSR 0
56 #define S_IRUSR 0
57
58 static int pwrite(int fd, const void *buf, size_t nbyte, off_t offset) {
59 // Get where we are
60 long pos = _tell(fd);
61
62 // Move to new offset
63 _lseek(fd, offset, SEEK_SET);
64
65 // Write data
66 int res = _write(fd, buf, nbyte);
67
68 // Return to previous offset
69 _lseek(fd, pos, SEEK_SET);
70
71 return res;
72 }
73
74 #else
75 #define statinfo stat
76 #endif
77
78 #if 0
79 #pragma mark Types and Utilities
80 #endif
81
82 #define MAX_STRING_ALLOCATION_SIZE 342
83 #define MAX_STRING_SIZE 1024
84 #define MAX_KEY_LENGTH MAX_STRING_SIZE * 4
85 #define CHARACTER_SET_SIZE 256
86 #define MAX_LIST_SIZE 256 // 64
87 #define MAX_BITMAP_SIZE 200
88 #define MAX_BUFFER_SIZE (4096<<2)
89
90 #define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3))
91 #define NextTrie_GetKind(p) (p & 3)
92 #define NextTrie_SetKind(p, kind) (p |= (3&kind))
93
94 #define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3)))
95 #define DiskNextTrie_GetKind(p) (p & 3)
96 #define DiskNextTrie_SetKind(p, kind) (p |= (3&kind))
97
98 // Use this macro to avoid forgetting to check the pointer before assigning value to it.
99 #define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0)
100
101 enum { Nothing = 0, TrieKind = 1, ListKind = 2, CompactTrieKind = 3 };
102 typedef enum { FailedInsert = 0, NewTerm = 1, ExistingTerm = 2 } CFBTInsertCode;
103
104 #pragma pack (1)
105 typedef uintptr_t NextTrie;
106
107 typedef struct _TrieLevel {
108 NextTrie slots[CHARACTER_SET_SIZE];
109 uint32_t weight;
110 uint32_t payload;
111 } TrieLevel;
112 typedef TrieLevel *TrieLevelRef;
113
114 typedef struct _MapTrieLevel {
115 uint32_t slots[CHARACTER_SET_SIZE];
116 uint32_t payload;
117 } MapTrieLevel;
118 typedef MapTrieLevel *MapTrieLevelRef;
119
120 typedef struct _CompactMapTrieLevel {
121 uint64_t bitmap[CHARACTER_SET_SIZE / 64];
122 uint32_t payload;
123 uint32_t slots[];
124 } CompactMapTrieLevel;
125 typedef CompactMapTrieLevel *CompactMapTrieLevelRef;
126
127 typedef struct _ListNode {
128 struct _ListNode *next;
129 uint32_t weight;
130 uint32_t payload;
131 uint16_t length;
132 UInt8 string[];
133 }* ListNodeRef;
134
135 typedef struct _Page {
136 uint32_t length;
137 char data[];
138 } Page;
139
140 typedef struct _PageEntryPacked {
141 uint8_t pfxLen;
142 uint16_t strlen;
143 uint32_t payload;
144 UInt8 string[];
145 } PageEntryPacked;
146
147 typedef struct _PageEntry {
148 uint16_t strlen;
149 uint32_t payload;
150 UInt8 string[];
151 } PageEntry;
152
153 typedef struct _TrieHeader {
154 uint32_t signature;
155 uint32_t rootOffset;
156 uint32_t count;
157 uint32_t size;
158 uint32_t flags;
159 uint64_t reserved[16];
160 } TrieHeader;
161
162 typedef struct _TrieCursor {
163 uint64_t signature;
164 uint64_t counter;
165 NextTrie next;
166 uint32_t keylen;
167 uint32_t prefixlen;
168 const uint8_t *prefix;
169 uint8_t key[MAX_KEY_LENGTH];
170 } TrieCursor;
171
172 typedef struct _MapCursor {
173 uint64_t signature;
174 TrieHeader *header;
175 uint32_t next;
176 uint32_t prefixlen;
177 uint32_t keylen;
178 const uint8_t *prefix;
179 uint8_t key[MAX_STRING_SIZE*4];
180 } MapCursor;
181
182 typedef struct _CompactMapCursor {
183 uint32_t next;
184 uint32_t entryOffsetInPage;
185 uint32_t offsetInEntry;
186 uint32_t payload;
187 // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether
188 // the cursor is merely pointing at the beginning of the page, or the first entry.
189 // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level,
190 // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor
191 // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and
192 // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the
193 // trie. the page level will actually contains three entries. The first entry corresponds to string "a".
194 // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will
195 // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still
196 // set to 0s. So we need an additional variable to make distinction between these two situations.
197 BOOL isOnPage;
198 } CompactMapCursor;
199 typedef struct _CompactMapCursor *MapCursorRef;
200
201 enum {
202 _kCFBurstTrieCursorTrieType = 0,
203 _kCFBurstTrieCursorMapType
204 };
205
206 typedef struct _CFBurstTrieCursor {
207 CompactMapCursor mapCursor;
208 CFIndex cursorType;
209 CFBurstTrieRef trie;
210 } _CFBurstTrieCursor;
211
212 // ** Legacy
213 typedef struct _DiskTrieLevel {
214 uint32_t slots[CHARACTER_SET_SIZE];
215 uint32_t weight;
216 uint32_t payload;
217 } DiskTrieLevel;
218 typedef DiskTrieLevel *DiskTrieLevelRef;
219
220 typedef struct _CompactDiskTrieLevel {
221 uint64_t bitmap[CHARACTER_SET_SIZE / 64]; // CHARACTER_SET_SIZE / 64bits per word
222 uint32_t weight;
223 uint32_t payload;
224 uint32_t slots[];
225 } CompactDiskTrieLevel;
226 typedef CompactDiskTrieLevel *CompactDiskTrieLevelRef;
227
228 typedef struct _StringPage {
229 uint32_t length;
230 char data[];
231 } StringPage;
232
233 typedef struct _StringPageEntryPacked {
234 uint8_t pfxLen;
235 uint16_t strlen; // make uint8_t if possible
236 uint32_t payload;
237 char string[];
238 } StringPageEntryPacked;
239
240 typedef struct _StringPageEntry {
241 uint16_t strlen; // make uint8_t if possible
242 uint32_t payload;
243 char string[];
244 } StringPageEntry;
245
246 typedef struct _fileHeader {
247 uint32_t signature;
248 uint32_t rootOffset;
249 uint32_t count;
250 uint32_t size;
251 uint32_t flags;
252 } fileHeader;
253 // **
254 #pragma pack()
255
256 struct _CFBurstTrie {
257 union {
258 TrieLevel root;
259 DiskTrieLevel diskRoot;
260 MapTrieLevel maproot;
261 };
262 char *mapBase;
263 uint32_t mapSize;
264 uint32_t mapOffset;
265 uint32_t cflags;
266 uint32_t count;
267 uint32_t containerSize;
268 int retain;
269 #if DEPLOYMENT_TARGET_WINDOWS
270 HANDLE mapHandle;
271 HANDLE mappedFileHandle;
272 #endif
273 };
274
275 #if 0
276 #pragma mark -
277 #pragma mark Forward declarations
278 #endif
279
280 typedef struct _TraverseContext {
281 void *context;
282 void (*callback)(void*, const UInt8*, uint32_t, uint32_t);
283 } TraverseContext;
284
285 static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact)
286 {
287 if (context != NULL) {
288 TraverseContext *ctx = (TraverseContext *)context;
289 if (ctx->context && ctx->callback) {
290 ctx->callback(ctx->context, key, 1, payload);
291 }
292 }
293 return false;
294 }
295
296 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
297
298 static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload);
299
300 static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool));
301 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool));
302 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
303 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
304 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
305 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool));
306
307 static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd);
308
309 static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
310 static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
311 static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix);
312
313 static void destroyCFBurstTrie(CFBurstTrieRef trie);
314 static void finalizeCFBurstTrie(TrieLevelRef trie);
315 static void finalizeCFBurstTrieList(ListNodeRef node);
316
317 static int nodeWeightCompare(const void *a, const void *b);
318 static int nodeStringCompare(const void *a, const void *b);
319
320 static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact);
321 static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact);
322
323 static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer);
324
325 static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length);
326 static Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload);
327 static void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination);
328 static Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs);
329 static void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback);
330 static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload);
331 static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload);
332
333 CFBurstTrieRef CFBurstTrieCreateWithOptions(CFDictionaryRef options) {
334 CFBurstTrieRef trie = NULL;
335 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
336 trie->containerSize = MAX_LIST_SIZE;
337
338 CFNumberRef valueAsCFNumber;
339 if (CFDictionaryGetValueIfPresent(options, kCFBurstTrieCreationOptionNameContainerSize, (const void **)&valueAsCFNumber)) {
340 int value;
341 CFNumberGetValue(valueAsCFNumber, kCFNumberIntType, &value);
342 trie->containerSize = value > 2 && value < 4096 ? value : MAX_LIST_SIZE;
343 }
344 trie->retain = 1;
345 return trie;
346 }
347
348 CFBurstTrieRef CFBurstTrieCreate() {
349 CFBurstTrieRef trie = NULL;
350 int listSize = MAX_LIST_SIZE;
351 CFNumberRef value = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &listSize);
352 CFMutableDictionaryRef options = CFDictionaryCreateMutable(kCFAllocatorDefault, 1, NULL, NULL);
353 CFDictionarySetValue(options, kCFBurstTrieCreationOptionNameContainerSize, value);
354 trie = CFBurstTrieCreateWithOptions(options);
355 CFRelease(value);
356 CFRelease(options);
357 return trie;
358 }
359
360 CFBurstTrieRef CFBurstTrieCreateFromFile(CFStringRef path) {
361 struct statinfo sb;
362 char filename[PATH_MAX];
363 int fd;
364
365 /* Check valid path name */
366 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return NULL;
367
368 /* Check if file exists */
369 if (stat(filename, &sb) != 0) return NULL;
370
371 /* Check if file can be opened */
372 if ((fd=open(filename, CF_OPENFLGS|O_RDONLY)) < 0) return NULL;
373
374 #if DEPLOYMENT_TARGET_WINDOWS
375 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd);
376 if (!mappedFileHandle) return NULL;
377
378 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
379 if (!mapHandle) return NULL;
380
381 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, sb.st_size);
382 if (!map) return NULL;
383 #else
384 char *map = mmap(0, sb.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, 0);
385 #endif
386
387 CFBurstTrieRef trie = NULL;
388 TrieHeader *header = (TrieHeader *)map;
389
390 if (((uint32_t*)map)[0] == 0xbabeface) {
391 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
392 trie->mapBase = map;
393 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size);
394 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset);
395 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags);
396 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count);
397 trie->retain = 1;
398 #if DEPLOYMENT_TARGET_WINDOWS
399 trie->mappedFileHandle = mappedFileHandle;
400 trie->mapHandle = mapHandle;
401 #else
402 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
403 close(fd);
404 #endif
405 } else if (header->signature == 0xcafebabe || header->signature == 0x0ddba11) {
406 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie));
407 trie->mapBase = map;
408 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size);
409 trie->cflags = CFSwapInt32LittleToHost(header->flags);
410 trie->count = CFSwapInt32LittleToHost(header->count);
411 trie->retain = 1;
412 #if DEPLOYMENT_TARGET_WINDOWS
413 trie->mappedFileHandle = mappedFileHandle;
414 trie->mapHandle = mapHandle;
415 #else
416 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
417 close(fd);
418 #endif
419 }
420 return trie;
421 }
422
423 CFBurstTrieRef CFBurstTrieCreateFromMapBytes(char *mapBase) {
424 CFBurstTrieRef trie = NULL;
425 TrieHeader *header = (TrieHeader *)mapBase;
426
427 if (mapBase && ((uint32_t*)mapBase)[0] == 0xbabeface) {
428 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie));
429 trie->mapBase = mapBase;
430 trie->mapSize = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->size);
431 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset);
432 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags);
433 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count);
434 trie->retain = 1;
435 } else if (mapBase && (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) {
436 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie));
437 trie->mapBase = mapBase;
438 trie->mapSize = CFSwapInt32LittleToHost(header->size);
439 trie->cflags = CFSwapInt32LittleToHost(header->flags);
440 trie->count = CFSwapInt32LittleToHost(header->count);
441 trie->retain = 1;
442 }
443 return trie;
444 }
445
446 Boolean CFBurstTrieInsert(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex payload) {
447 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, (uint32_t)payload);
448 }
449
450 Boolean CFBurstTrieAdd(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t payload) {
451 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, payload);
452 }
453
454 Boolean CFBurstTrieInsertCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex payload) {
455 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, (uint32_t)payload);
456 }
457
458 Boolean CFBurstTrieAddCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t payload) {
459 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, payload);
460 }
461
462 Boolean CFBurstTrieInsertUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex payload) {
463 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, (uint32_t)payload);
464 }
465
466 Boolean CFBurstTrieAddUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t payload) {
467 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, payload);
468 }
469
470 Boolean CFBurstTrieInsertWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex weight, CFIndex payload) {
471 return CFBurstTrieAddWithWeight(trie, term, termRange, weight, (uint32_t)payload);
472 }
473
474 Boolean CFBurstTrieAddWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t weight, uint32_t payload) {
475 Boolean success = false;
476 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
477 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size
478 if (!trie->mapBase && termRange.length < MAX_STRING_SIZE && payload > 0) {
479 CFIndex length;
480 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
481 UInt8 *key = buffer;
482 if (bytesize >= size) {
483 size = bytesize;
484 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
485 }
486 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length);
487 key[length] = 0;
488
489 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload);
490 if (buffer != key) free(key);
491 }
492 return success;
493 }
494
495 Boolean CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex weight, CFIndex payload) {
496 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, weight, (uint32_t)payload);
497 }
498
499 Boolean CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t weight, uint32_t payload) {
500 Boolean success = false;
501 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
502 CFIndex bytesize = numChars * 4; //** 4-byte max character size
503 if (!trie->mapBase && numChars < MAX_STRING_SIZE && payload > 0) {
504 CFIndex length;
505 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
506 UInt8 *key = buffer;
507 if (bytesize >= size) {
508 size = bytesize;
509 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
510 }
511 length = burstTrieConvertCharactersToUTF8(chars, numChars, key);
512 key[length] = 0;
513
514 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload);
515 if (buffer != key) free(key);
516 }
517 return success;
518 }
519
520 Boolean CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex weight, CFIndex payload) {
521 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, weight, (uint32_t)payload);
522 }
523
524 Boolean CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t weight, uint32_t payload) {
525 CFBTInsertCode code = FailedInsert;
526
527 if (!trie->mapBase && numChars < MAX_STRING_SIZE*4 && payload > 0) {
528 code = addCFBurstTrieLevel(trie, &trie->root, chars, numChars, weight, payload);
529 if (code == NewTerm) trie->count++;
530 }
531 return code > FailedInsert;
532 }
533
534 Boolean CFBurstTrieFind(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex *payload) {
535 uint32_t p;
536 if (CFBurstTrieContains(trie, term, termRange, &p)) {
537 SetPayload(payload, p);
538 return true;
539 }
540 return false;
541 }
542
543 Boolean CFBurstTrieContains(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t *payload) {
544 Boolean success = false;
545 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
546 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size
547 if (termRange.length < MAX_STRING_SIZE) {
548 CFIndex length;
549 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE+1];
550 UInt8 *key = buffer;
551 if (bytesize >= size) {
552 size = bytesize;
553 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
554 }
555 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length);
556 key[length] = 0;
557
558 success = CFBurstTrieContainsUTF8String(trie, key, length, payload);
559 if (buffer != key) free(key);
560 }
561 return success;
562 }
563
564 Boolean CFBurstTrieFindCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex *payload) {
565 uint32_t p;
566 if (CFBurstTrieContainsCharacters(trie, chars, numChars, &p)) {
567 SetPayload(payload, p);
568 return true;
569 }
570 return false;
571 }
572
573 Boolean CFBurstTrieContainsCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t *payload) {
574 Boolean success = false;
575 CFIndex size = MAX_STRING_ALLOCATION_SIZE;
576 CFIndex bytesize = numChars * 4; //** 4-byte max character size
577 if (numChars < MAX_STRING_SIZE) {
578 CFIndex length;
579 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1];
580 UInt8 *key = buffer;
581 if (bytesize >= size) {
582 size = bytesize;
583 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1);
584 }
585 length = burstTrieConvertCharactersToUTF8(chars, numChars, key);
586 key[length] = 0;
587
588 success = CFBurstTrieContainsUTF8String(trie, key, length, payload);
589 if (buffer != key) free(key);
590 }
591 return success;
592 }
593
594 Boolean CFBurstTrieFindUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, CFIndex *payload) {
595 uint32_t p;
596 if (CFBurstTrieContainsUTF8String(trie, key, length, &p)) {
597 SetPayload(payload, p);
598 return true;
599 }
600 return false;
601 }
602
603 Boolean CFBurstTrieContainsUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, uint32_t *payload) {
604 Boolean success = false;
605 if (length < MAX_STRING_SIZE) {
606 if (trie->mapBase && ((fileHeader *)trie->mapBase)->signature == 0xbabeface) {
607 bool prefix = (trie->cflags & kCFBurstTriePrefixCompression);
608 success = burstTrieMappedFind((DiskTrieLevelRef)(trie->mapBase+CFSwapInt32LittleToHost((((uint32_t*)trie->mapBase)[1]))), trie->mapBase, key, length, payload, prefix);
609 } else if (trie->mapBase && trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey)) {
610 _CFBurstTrieCursor cursor;
611 if (!CFBurstTrieSetCursorForBytes(trie, &cursor, key, length))
612 return FALSE;
613 return CFBurstTrieCursorGetPayload(&cursor, payload);
614 } else {
615 uint32_t found = 0;
616 void *cursor = 0;
617 traverseCFBurstTrieWithCursor(trie, key, length, &cursor, true, &found, containsKey);
618 if (found) SetPayload(payload, found);
619 success = found > 0;
620 }
621 }
622 return success;
623 }
624
625 Boolean CFBurstTrieSerialize(CFBurstTrieRef trie, CFStringRef path, CFBurstTrieOpts opts) {
626 Boolean success = false;
627 if (trie->mapBase) {
628 return success;
629 } else {
630 int fd;
631 char filename[PATH_MAX];
632
633 /* Check valid path name */
634 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return success;
635
636 /* Check if file can be opened */
637 if ((fd=open(filename, CF_OPENFLGS|O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) < 0) return success;
638
639 if (CFBurstTrieSerializeWithFileDescriptor(trie, fd, opts)) success = true;
640
641 close(fd);
642 }
643 return success;
644 }
645
646 Boolean CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie, int fd, CFBurstTrieOpts opts) {
647 Boolean success = false;
648 if (!trie->mapBase && fd >= 0) {
649 off_t start_offset = lseek(fd, 0, SEEK_END);
650
651 trie->cflags = opts;
652 trie->mapSize = serializeCFBurstTrie(trie, start_offset, fd);
653
654 #if DEPLOYMENT_TARGET_WINDOWS
655 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd);
656 // We need to make sure we have our own handle to keep this file open as long as the mmap lasts
657 DuplicateHandle(GetCurrentProcess(), mappedFileHandle, GetCurrentProcess(), &mappedFileHandle, 0, 0, DUPLICATE_SAME_ACCESS);
658 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
659 if (!mapHandle) return NULL;
660 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, start_offset, trie->mapSize);
661 if (!map) return NULL;
662 trie->mapBase = map;
663 trie->mapHandle = mapHandle;
664 trie->mappedFileHandle = mappedFileHandle;
665 #else
666 trie->mapBase = mmap(0, trie->mapSize, PROT_READ, MAP_FILE|MAP_SHARED, fd, start_offset);
667 #endif
668 success = true;
669 }
670
671 return success;
672 }
673
674 void CFBurstTrieTraverse(CFBurstTrieRef trie, void *ctx, void (*callback)(void*, const UInt8*, uint32_t, uint32_t)) {
675 TrieHeader *header = (TrieHeader *)trie->mapBase;
676 if (!trie->mapBase || (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) {
677 void *cursor = 0;
678 TraverseContext context;
679 context.context = ctx;
680 context.callback = callback;
681 traverseCFBurstTrieWithCursor(trie, (const uint8_t *)"", 0, &cursor, false, &context, foundKey);
682 }
683 }
684
685
686 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
687 {
688 traverseCFBurstTrieWithCursor(trie, prefix, prefixLen, cursor, false, ctx, callback);
689 }
690
691 void CFBurstTriePrint(CFBurstTrieRef trie) {
692
693 }
694
695 CFIndex CFBurstTrieGetCount(CFBurstTrieRef trie) {
696 return trie->count;
697 }
698
699 CFBurstTrieRef CFBurstTrieRetain(CFBurstTrieRef trie) {
700 trie->retain++;
701 return trie;
702 }
703
704 void CFBurstTrieRelease(CFBurstTrieRef trie) {
705 trie->retain--;
706 if (trie->retain == 0) destroyCFBurstTrie(trie);
707 return;
708 }
709
710 Boolean CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie, CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length)
711 {
712 if (!trie->mapBase || !(trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey))) {
713 //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n");
714 return FALSE;
715 }
716
717 TrieHeader *header = (TrieHeader*)trie->mapBase;
718 if (length < 0 || !trie)
719 return FALSE;
720
721 cursor->trie = trie;
722 if (trie->mapBase) {
723 cursor->cursorType = _kCFBurstTrieCursorMapType;
724 cursor->mapCursor.next = header->rootOffset;
725 cursor->mapCursor.isOnPage = FALSE;
726 cursor->mapCursor.entryOffsetInPage = 0;
727 cursor->mapCursor.offsetInEntry = 0;
728 cursor->mapCursor.payload = 0;
729 } else
730 assert(false);
731
732 if (!bytes || length == 0)
733 return TRUE;
734
735 return CFBurstTrieCursorAdvanceForBytes(cursor, bytes, length);
736 }
737
738
739 CFBurstTrieCursorRef CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie, const UInt8* bytes, CFIndex length)
740 {
741 CFBurstTrieCursorRef cursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1);
742 if (!CFBurstTrieSetCursorForBytes(trie, cursor, bytes, length)) {
743 CFBurstTrieCursorRelease(cursor);
744 return NULL;
745 }
746 return cursor;
747 }
748
749 CFBurstTrieCursorRef CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor)
750 {
751 if (!cursor)
752 return NULL;
753
754 CFBurstTrieCursorRef newCursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1);
755 switch (cursor->cursorType) {
756 case _kCFBurstTrieCursorMapType:
757 copyMapCursor(&cursor->mapCursor, &newCursor->mapCursor);
758 break;
759 case _kCFBurstTrieCursorTrieType:
760 assert(false);
761 break;
762 }
763 newCursor->cursorType = cursor->cursorType;
764 newCursor->trie = cursor->trie;
765 return newCursor;
766 }
767
768 Boolean CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs, CFBurstTrieCursorRef rhs)
769 {
770 if (lhs->trie != rhs->trie || lhs->cursorType != rhs->cursorType)
771 return FALSE;
772
773 if (lhs->cursorType == _kCFBurstTrieCursorMapType)
774 return areMapCursorsEqual(&lhs->mapCursor, &rhs->mapCursor);
775 else
776 return FALSE;
777 }
778
779 Boolean CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length)
780 {
781 switch (cursor->cursorType) {
782 case _kCFBurstTrieCursorMapType: {
783 CompactMapCursor tempCursor;
784 copyMapCursor(&cursor->mapCursor, &tempCursor);
785 if (advanceMapCursor(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, bytes, length))
786 return TRUE;
787 else {
788 copyMapCursor(&tempCursor, &cursor->mapCursor);
789 return FALSE;
790 }
791 }
792 case _kCFBurstTrieCursorTrieType:
793 return FALSE;
794 }
795 return FALSE;
796 }
797
798 Boolean CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor, uint32_t *payload)
799 {
800 switch (cursor->cursorType) {
801 case _kCFBurstTrieCursorMapType:
802 return getMapCursorPayload(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, payload);
803 case _kCFBurstTrieCursorTrieType:
804 return FALSE;
805 }
806 return FALSE;
807 }
808
809 void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor)
810 {
811 if (!cursor)
812 return;
813 free(cursor);
814 }
815
816 void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor, void *ctx, CFBurstTrieTraversalCallback callback)
817 {
818 if (!cursor)
819 return;
820
821 UInt8 *bytes = (UInt8*)calloc(1, MAX_KEY_LENGTH);
822 uint32_t capacity = MAX_KEY_LENGTH;
823 uint32_t length = 0;
824 Boolean stop = FALSE;
825 switch (cursor->cursorType) {
826 case _kCFBurstTrieCursorMapType: {
827 CompactMapCursor tempCursor;
828 copyMapCursor(&cursor->mapCursor, &tempCursor);
829 traverseFromMapCursor(cursor->trie, &tempCursor, bytes, capacity,length, &stop, ctx, callback);
830 break;
831 }
832 case _kCFBurstTrieCursorTrieType:
833 break;
834 }
835 free(bytes);
836 }
837
838 #if 0
839 #pragma mark -
840 #pragma mark Insertion
841 #endif
842
843 static ListNodeRef makeCFBurstTrieListNode(const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) {
844 ListNodeRef node = (ListNodeRef) calloc(1, sizeof(*node) + keylen + 1);
845 memcpy(node->string, key, keylen);
846 node->string[keylen] = 0;
847 node->next = 0;
848 node->length = keylen;
849 node->weight = weight;
850 node->payload = payload;
851 return node;
852 }
853
854 static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) {
855 if (keylen) {
856 NextTrie next = root->slots[*key];
857 ListNodeRef newNode = makeCFBurstTrieListNode(key+1, keylen-1, weight, payload);
858 newNode->weight = weight;
859 newNode->next = (ListNodeRef) NextTrie_GetPtr(next);
860 next = (uintptr_t) newNode;
861 NextTrie_SetKind(next, ListKind);
862 root->slots[*key] = next;
863 } else {
864 // ** Handle payload.
865 root->weight = weight;
866 root->payload = payload;
867 }
868 }
869
870 static TrieLevelRef burstCFBurstTrieLevel(CFBurstTrieRef trie, ListNodeRef list, uint32_t listCount) {
871 TrieLevelRef newLevel = (TrieLevelRef) calloc(1, sizeof(struct _TrieLevel));
872 while (list) {
873 addCFBurstTrieBurstLevel(trie, newLevel, list->string, list->length, list->weight, list->payload);
874 ListNodeRef temp = list;
875 list = list->next;
876 free(temp);
877 }
878 return newLevel;
879 }
880
881 static CFBTInsertCode addCFBurstTrieListNode(CFBurstTrieRef trie, ListNodeRef list, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload, uint32_t *listCount)
882 {
883 CFBTInsertCode code = FailedInsert;
884 uint32_t count = 1;
885
886 ListNodeRef last = list;
887 while (list) {
888 if (list->length == keylen && memcmp(key, list->string, keylen) == 0) {
889 list->weight += weight;
890 list->payload = payload;
891 code = ExistingTerm;
892 break;
893 } else {
894 count++;
895 last = list;
896 list = list->next;
897 }
898 }
899
900 if (!list) {
901 last->next = makeCFBurstTrieListNode(key, keylen, weight, payload);
902 code = NewTerm;
903 }
904
905 *listCount = count;
906 return code;
907 }
908
909 static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload)
910 {
911 CFBTInsertCode code = FailedInsert;
912 if (keylen) {
913 NextTrie next = root->slots[*key];
914 if (NextTrie_GetKind(next) == TrieKind) {
915 TrieLevelRef nextLevel = (TrieLevelRef) NextTrie_GetPtr(next);
916 code = addCFBurstTrieLevel(trie, nextLevel, key+1, keylen-1, weight, payload);
917 } else {
918 if (NextTrie_GetKind(next) == ListKind) {
919 uint32_t listCount;
920 ListNodeRef listNode = (ListNodeRef) NextTrie_GetPtr(next);
921 code = addCFBurstTrieListNode(trie, listNode, key+1, keylen-1, weight, payload, &listCount);
922 if (listCount > trie->containerSize) {
923 next = (uintptr_t) burstCFBurstTrieLevel(trie, listNode, listCount);
924 NextTrie_SetKind(next, TrieKind);
925 }
926 } else {
927 // ** Make a new list node
928 next = (uintptr_t) makeCFBurstTrieListNode(key+1, keylen-1, weight, payload);
929 NextTrie_SetKind(next, ListKind);
930 code = NewTerm;
931 }
932 root->slots[*key] = next;
933 }
934 } else {
935 // ** Handle payload
936 if (!root->weight) code = NewTerm;
937 else code = ExistingTerm;
938 root->weight += weight;
939 root->payload = payload;
940 }
941
942 return code;
943 }
944 #if 0
945 #pragma mark -
946 #pragma mark Searching
947 #endif
948 static void findCFBurstTrieList(CFBurstTrieRef trie, TrieCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
949 {
950 ListNodeRef list = (ListNodeRef)NextTrie_GetPtr(cursor->next);
951 int len = cursor->prefixlen-cursor->keylen;
952 len = len <= 0 ? 0 : len;
953 while (list) {
954 int lencompare = list->length-len;
955 if (list->length >= len &&
956 (len == 0 || memcmp(list->string, cursor->prefix+cursor->keylen, len) == 0)) {
957 memcpy(cursor->key+cursor->keylen, list->string, list->length);
958 cursor->key[cursor->keylen+list->length] = 0;
959 cursor->next = (NextTrie)list;
960 if (list->payload && callback(ctx, cursor->key, list->payload, lencompare==0)) return;
961 }
962 list = list->next;
963 }
964 }
965
966 static void findCFBurstTrieMappedPage(CFBurstTrieRef trie, MapCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
967 {
968 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
969 uint32_t end = page->length;
970 uint32_t cur = 0;
971 int len = cursor->prefixlen-cursor->keylen;
972 len = len <= 0 ? 0 : len;
973 if (trie->cflags & kCFBurstTriePrefixCompression) {
974 uint8_t pfx[CHARACTER_SET_SIZE];
975 PageEntryPacked *lastEntry = 0;
976 while (cur < end) {
977 PageEntryPacked *entry = (PageEntryPacked *)&page->data[cur];
978 int lencompare = (entry->strlen+entry->pfxLen)-len;
979 if (lastEntry && entry->pfxLen>lastEntry->pfxLen) memcpy(pfx+lastEntry->pfxLen, lastEntry->string, entry->pfxLen-lastEntry->pfxLen);
980 if (lencompare >= 0 &&
981 (len == 0 || (__builtin_memcmp(pfx, cursor->prefix+cursor->keylen, entry->pfxLen) == 0 &&
982 __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen+entry->pfxLen, cursor->prefixlen-cursor->keylen-entry->pfxLen) == 0))) {
983 memcpy(cursor->key+cursor->keylen, pfx, entry->pfxLen);
984 memcpy(cursor->key+cursor->keylen+entry->pfxLen, entry->string, entry->strlen);
985 cursor->key[cursor->keylen+entry->pfxLen+entry->strlen] = 0;
986 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return;
987 }
988 lastEntry = entry;
989 cur += sizeof(*entry) + entry->strlen;
990 }
991 } else {
992 while (cur < end) {
993 PageEntry *entry = (PageEntry *)&page->data[cur];
994 int lencompare = entry->strlen-len;
995 if (lencompare >= 0 && __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen, len) == 0) {
996 memcpy(cursor->key+cursor->keylen, entry->string, entry->strlen);
997 cursor->key[cursor->keylen+entry->strlen] = 0;
998 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return;
999 }
1000 cur += sizeof(*entry) + entry->strlen;
1001 }
1002 }
1003 }
1004
1005
1006 static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
1007 {
1008 if (cursor->keylen < cursor->prefixlen) {
1009 cursor->next = ((TrieLevelRef)NextTrie_GetPtr(cursor->next))->slots[cursor->prefix[cursor->keylen]];
1010 cursor->key[cursor->keylen++] = cursor->prefix[cursor->keylen];
1011
1012 if (NextTrie_GetKind(cursor->next) == TrieKind) {
1013 findCFBurstTrieLevel(trie, cursor, exactmatch, ctx, callback);
1014 } else if (NextTrie_GetKind(cursor->next) == ListKind) {
1015 findCFBurstTrieList(trie, cursor, ctx, callback);
1016 }
1017 } else {
1018 TrieLevelRef root = (TrieLevelRef)NextTrie_GetPtr(cursor->next);
1019 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
1020 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1021 traverseCFBurstTrieLevel(trie, root, cursor, exactmatch, ctx, callback);
1022 }
1023 }
1024
1025 static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
1026 {
1027 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1028 if (cursor->keylen < cursor->prefixlen) {
1029 uint8_t mykey = *(cursor->prefix+cursor->keylen);
1030 cursor->key[cursor->keylen++] = *(cursor->prefix+cursor->keylen);
1031
1032 uint8_t slot = mykey / 64;
1033 uint8_t bit = mykey % 64;
1034 uint32_t item = 0;
1035 uint64_t bword = root->bitmap[slot];
1036
1037 if (bword & (1ull << bit)) {
1038 // ** Count all the set bits up to this bit
1039 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]);
1040 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1041 uint32_t offset = root->slots[item];
1042 cursor->next = offset;
1043
1044 if (DiskNextTrie_GetKind(offset) == TrieKind) {
1045 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback);
1046 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1047 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback);
1048 } else if (DiskNextTrie_GetKind(offset) == ListKind) {
1049 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1050 }
1051 }
1052 } else {
1053 if(root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
1054 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1055 traverseCFBurstTrieCompactMappedLevel(trie, root, cursor, exactmatch, ctx, callback);
1056 }
1057 }
1058
1059 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool))
1060 {
1061 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1062 if (cursor->keylen < cursor->prefixlen) {
1063 uint8_t slot = *(cursor->prefix+cursor->keylen);
1064 cursor->next = root->slots[slot];
1065 cursor->key[cursor->keylen++] = slot;
1066
1067 if (DiskNextTrie_GetKind(cursor->next) == TrieKind) {
1068 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback);
1069 } else if (DiskNextTrie_GetKind(cursor->next) == CompactTrieKind) {
1070 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback);
1071 } else if (DiskNextTrie_GetKind(cursor->next) == ListKind) {
1072 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1073 }
1074 } else {
1075 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return;
1076 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1077 traverseCFBurstTrieMappedLevel(trie, root, cursor, exactmatch, ctx, callback);
1078 }
1079 }
1080
1081 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
1082 {
1083 cursor->key[cursor->keylen] = 0;
1084 uint32_t len = cursor->keylen;
1085 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1086 NextTrie next = root->slots[i];
1087 cursor->keylen = len;
1088 cursor->key[cursor->keylen++] = i;
1089
1090 if (NextTrie_GetKind(next) == TrieKind) {
1091 TrieLevelRef level = (TrieLevelRef)NextTrie_GetPtr(next);
1092 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1093 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1094 traverseCFBurstTrieLevel(trie, level, cursor, exactmatch, ctx, callback);
1095 } else if (NextTrie_GetKind(next) == ListKind) {
1096 cursor->next = next;
1097 cursor->key[cursor->keylen] = 0;
1098 findCFBurstTrieList(trie, cursor, ctx, callback);
1099 }
1100 }
1101 }
1102
1103 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
1104 {
1105 cursor->key[cursor->keylen] = 0;
1106 uint32_t len = cursor->keylen;
1107
1108 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1109 uint32_t offset = (uint32_t)root->slots[i];
1110 cursor->keylen = len;
1111 cursor->key[cursor->keylen++] = i;
1112
1113 if (DiskNextTrie_GetKind(offset) == TrieKind) {
1114 MapTrieLevelRef level = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
1115 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1116 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1117 traverseCFBurstTrieMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
1118 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1119 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
1120 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1121 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1122 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
1123 } else if (DiskNextTrie_GetKind(offset) == ListKind) {
1124 cursor->next = offset;
1125 cursor->key[cursor->keylen] = 0;
1126 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1127 }
1128 }
1129 }
1130
1131 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool))
1132 {
1133 cursor->key[cursor->keylen] = 0;
1134 uint32_t len = cursor->keylen;
1135 for (uint32_t c=0; c < CHARACTER_SET_SIZE; c++) {
1136 //** This could be optimized to remember what the last slot/item was and not count bits over and over.
1137 uint8_t mykey = c;
1138 uint32_t slot = mykey / 64;
1139 uint32_t bit = mykey % 64;
1140 uint32_t item = 0;
1141 uint64_t bword = root->bitmap[slot];
1142 cursor->keylen = len;
1143
1144 if (bword & (1ull << bit)) {
1145 // ** Count all the set bits up to this bit
1146 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]);
1147 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1148 uint32_t offset = root->slots[item];
1149 cursor->key[cursor->keylen++] = mykey;
1150
1151 if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1152 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset);
1153 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return;
1154 if (cursor->keylen == cursor->prefixlen && exactmatch) return;
1155 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback);
1156 } else if(DiskNextTrie_GetKind(offset) == TrieKind) {
1157 traverseCFBurstTrieMappedLevel(trie, (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset), cursor, exactmatch, ctx, callback);
1158 } else if (DiskNextTrie_GetKind(offset) == ListKind) {
1159 cursor->next = offset;
1160 cursor->key[cursor->keylen] = 0;
1161 findCFBurstTrieMappedPage(trie, cursor, ctx, callback);
1162 }
1163 }
1164 }
1165 }
1166
1167 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) {
1168 if (trie->mapBase) {
1169 if (trie->cflags & kCFBurstTriePrefixCompression) {
1170 fprintf(stderr, "Please use CFBurstTrieCursorRef API for file based trie.\n");
1171 return;
1172 } else {
1173 TrieHeader *header = (TrieHeader *)trie->mapBase;
1174 MapCursor csr;
1175 csr.next = header->rootOffset;
1176 csr.prefix = prefix;
1177 csr.prefixlen = prefixLen;
1178 csr.key[0] = 0;
1179 csr.keylen = 0;
1180 findCFBurstTrieMappedLevel(trie, &csr, exactmatch, ctx, callback);
1181 }
1182 } else {
1183 TrieCursor csr;
1184 csr.next = ((unsigned long)&trie->root)|TrieKind;
1185 csr.prefix = prefix;
1186 csr.prefixlen = prefixLen;
1187 csr.key[0] = 0;
1188 csr.keylen = 0;
1189 findCFBurstTrieLevel(trie, &csr, exactmatch, ctx, callback);
1190 }
1191 }
1192
1193 CF_INLINE uint32_t getPackedPageEntrySize(PageEntryPacked *entry)
1194 {
1195 return sizeof(PageEntryPacked) + entry->strlen;
1196 }
1197
1198 CF_INLINE uint32_t getPageEntrySize(PageEntry *entry)
1199 {
1200 return sizeof(PageEntry) + entry->strlen;
1201 }
1202
1203 /*
1204 static void _printPageEntry(PageEntryPacked *entry) {
1205 printf("entry 0x%p:\n", entry);
1206 printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen);
1207 if (entry->strlen > 0) {
1208 printf("string = ");
1209 for (int i = 0; i < entry->strlen; ++i)
1210 printf("%d ", entry->string[i]);
1211 printf("\n");
1212 }
1213 printf("\n");
1214 }
1215 */
1216 static Boolean advanceCursorOnMappedPageForByte(Page *page, CompactMapCursor *cursor, UInt8 byte) {
1217 PageEntryPacked *entry;
1218 Boolean found = FALSE;
1219 uint32_t minPrefixLength = 0;
1220
1221 if (cursor->isOnPage) {
1222 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage];
1223 //_printPageEntry(entry);
1224 BOOL shouldContinue = TRUE;
1225
1226 if (!(cursor->entryOffsetInPage == 0 && entry->strlen == 0)) {
1227 if (cursor->entryOffsetInPage == entry->strlen - 1) {
1228 minPrefixLength = entry->pfxLen + entry->strlen;
1229 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1230 } else {
1231 cursor->offsetInEntry++;
1232 if (entry->string[cursor->offsetInEntry] == byte)
1233 found = TRUE;
1234 else if (entry->string[cursor->offsetInEntry] > byte)
1235 shouldContinue = FALSE;
1236 else {
1237 minPrefixLength = entry->pfxLen + cursor->offsetInEntry;
1238 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1239 }
1240 }
1241 }
1242
1243 if (found) {
1244 cursor->isOnPage = TRUE;
1245 return TRUE;
1246 } else if (!shouldContinue)
1247 return FALSE;
1248 } else {
1249 cursor->entryOffsetInPage = 0;
1250 }
1251
1252 uint32_t pageSize = page->length - sizeof(Page);
1253 while (cursor->entryOffsetInPage < pageSize) {
1254 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage];
1255 //_printPageEntry(entry);
1256 if (minPrefixLength > entry->pfxLen)
1257 break;
1258 else if (minPrefixLength < entry->pfxLen)
1259 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1260 else {
1261 if (entry->strlen == 0)
1262 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1263 else {
1264 if (entry->string[0] > byte)
1265 // Entries are sorted alphabetically
1266 break;
1267 else if (entry->string[0] < byte)
1268 cursor->entryOffsetInPage += getPackedPageEntrySize(entry);
1269 else {
1270 cursor->offsetInEntry = 0;
1271 found = TRUE;
1272 break;
1273 }
1274 }
1275 }
1276 }
1277
1278 if (found)
1279 cursor->isOnPage = TRUE;
1280
1281 return found;
1282 }
1283
1284 static Boolean advanceCursorMappedPageWithPerfixCompression(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1285 {
1286 if (length == 0) {
1287 PageEntryPacked *entry = (PageEntryPacked*)&page->data[0];
1288 if (!cursor->isOnPage) {
1289 cursor->entryOffsetInPage = 0;
1290 cursor->offsetInEntry = 0;
1291 cursor->isOnPage = entry->pfxLen == 0 && entry->strlen == 0;
1292 }
1293 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload);
1294 return TRUE;
1295 }
1296
1297 for (CFIndex i = 0; i < length; ++i) {
1298 if (!advanceCursorOnMappedPageForByte(page, cursor, bytes[i]))
1299 return FALSE;
1300 }
1301 PageEntryPacked *entry = (PageEntryPacked*)&page->data[cursor->entryOffsetInPage];
1302 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload);
1303 return TRUE;
1304 }
1305
1306 static Boolean advanceCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1307 {
1308 if (length == 0) {
1309 PageEntry*entry = (PageEntry*)&page->data[0];
1310 if (!cursor->isOnPage) {
1311 cursor->entryOffsetInPage = 0;
1312 cursor->offsetInEntry = 0;
1313 cursor->isOnPage = entry->strlen == 0;
1314 }
1315 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload);
1316 return TRUE;
1317 }
1318
1319 PageEntry *entry;
1320 uint32_t pageSize = page->length - sizeof(Page);
1321 const UInt8 * prefix = NULL;
1322 uint32_t prefixLength = 0;
1323
1324 if (cursor->isOnPage) {
1325 entry = (PageEntry*)&page->data[cursor->entryOffsetInPage];
1326 prefix = entry->string;
1327 prefixLength = cursor->offsetInEntry + 1;
1328 }
1329
1330 while (cursor->entryOffsetInPage < pageSize) {
1331 PageEntry *entry = (PageEntry*)&page->data[cursor->entryOffsetInPage];
1332 if (entry->strlen == 0) {
1333 cursor->entryOffsetInPage += getPageEntrySize(entry);
1334 continue;
1335 }
1336
1337 if (entry->strlen <= prefixLength)
1338 return FALSE;
1339 else {
1340 int cmpResult = 0;
1341 if (prefixLength > 0)
1342 cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength);
1343 if (cmpResult > 0)
1344 return FALSE;
1345 else if (cmpResult == 0) {
1346 if (entry->strlen < prefixLength + length) {
1347 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, entry->strlen - prefixLength);
1348 if (cmpResult2 > 0)
1349 return FALSE;
1350 else
1351 cursor->entryOffsetInPage += getPageEntrySize(entry);
1352 } else {
1353 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, length);
1354 if (cmpResult2 > 0)
1355 return FALSE;
1356 else if (cmpResult2 == 0) {
1357 cursor->isOnPage = TRUE;
1358 cursor->offsetInEntry = prefixLength + length - 1;
1359 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload);
1360 return TRUE;
1361 } else
1362 cursor->entryOffsetInPage += getPageEntrySize(entry);
1363 }
1364 } else
1365 cursor->entryOffsetInPage += getPageEntrySize(entry);
1366 }
1367 }
1368 return FALSE;
1369 }
1370
1371 static Boolean advanceCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1372 {
1373 if (!bytes || length < 0)
1374 return FALSE;
1375
1376 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1377 uint32_t pageSize = page->length - sizeof(Page);
1378 if (pageSize == 0)
1379 return FALSE;
1380
1381 if (trie->cflags & kCFBurstTrieSortByKey)
1382 return advanceCursorMappedPageSortedByKey(page, cursor, bytes, length);
1383 else if (trie->cflags & kCFBurstTriePrefixCompression)
1384 return advanceCursorMappedPageWithPerfixCompression(page, cursor, bytes, length);
1385 else
1386 return FALSE;
1387 }
1388
1389 static Boolean advanceCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1390 {
1391 if (!bytes || length < 0)
1392 return FALSE;
1393
1394 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1395 if (length == 0) {
1396 cursor->payload = root->payload;
1397 return TRUE;
1398 }
1399
1400 uint8_t slot = bytes[0] / 64;
1401 uint8_t bit = bytes[0] % 64;
1402 uint32_t item = 0;
1403 uint64_t bword = root->bitmap[slot];
1404 if (bword & (1ull << bit)) {
1405 // Count all the set bits up to this bit
1406 for (int i = 0; i < slot; ++i)
1407 item += __builtin_popcountll(root->bitmap[i]);
1408 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1409 cursor->next = root->slots[item];
1410 return advanceMapCursor(trie, cursor, bytes + 1, length - 1);
1411 } else {
1412 return FALSE;
1413 }
1414 }
1415
1416 static Boolean advanceCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1417 {
1418 if (!bytes || length < 0)
1419 return FALSE;
1420
1421 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1422 if (length == 0) {
1423 cursor->payload = root->payload;
1424 return TRUE;
1425 }
1426
1427 cursor->next = root->slots[bytes[0]];
1428 return advanceMapCursor(trie, cursor, bytes + 1, length - 1);
1429 }
1430
1431 static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length)
1432 {
1433 bool result = FALSE;
1434 switch (DiskNextTrie_GetKind(cursor->next)) {
1435 case TrieKind:
1436 result = advanceCursorMappedLevel(trie, cursor, bytes, length);
1437 break;
1438 case CompactTrieKind:
1439 result = advanceCursorCompactMappedLevel(trie, cursor, bytes, length);
1440 break;
1441 case ListKind:
1442 result = advanceCursorMappedPage(trie, cursor, bytes, length);
1443 break;
1444 case Nothing: {
1445 TrieHeader *header = (TrieHeader*)trie->mapBase;
1446 if (cursor->next == header->rootOffset)
1447 result = advanceCursorMappedLevel(trie, cursor, bytes, length);
1448 }
1449 }
1450
1451 return result;
1452 }
1453
1454 static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1455 {
1456 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1457 if (root->payload) {
1458 callback(ctx, bytes, length, root->payload, stop);
1459 if (*stop)
1460 return;
1461 }
1462
1463 if (length >= capacity)
1464 return;
1465
1466 for (int i = 0; i < CHARACTER_SET_SIZE; ++i) {i =
1467 bytes[length] = i;
1468 cursor->next = (uint32_t)root->slots[i];;
1469 cursor->isOnPage = FALSE;
1470 cursor->entryOffsetInPage = 0;
1471 cursor->offsetInEntry = 0;
1472 cursor->payload = 0;
1473 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback);
1474 if (*stop)
1475 return;
1476 }
1477 }
1478
1479 static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1480 {
1481 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1482 if (root->payload) {
1483 callback(ctx, bytes, length, root->payload, stop);
1484 if (*stop)
1485 return;
1486 }
1487
1488 if (length >= capacity)
1489 return;
1490 for (int c = 0; c < CHARACTER_SET_SIZE; ++c) {
1491 bytes[length] = c;
1492 //This could be optimized to remember what the last slot/item was and not count bits over and over.
1493 uint8_t slot = c / 64;
1494 uint8_t bit = c % 64;
1495 uint32_t item = 0;
1496 uint64_t bword = root->bitmap[slot];
1497 if (bword & (1ull << bit)) {
1498 // Count all the set bits up to this bit
1499 for (int i = 0; i < slot; ++i)
1500 item += __builtin_popcountll(root->bitmap[i]);
1501 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1502 cursor->next = root->slots[item];
1503 cursor->isOnPage = FALSE;
1504 cursor->entryOffsetInPage = 0;
1505 cursor->offsetInEntry = 0;
1506 cursor->payload = 0;
1507 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback);
1508 if (*stop)
1509 return;
1510 }
1511 }
1512 }
1513
1514 static void traverseFromMapCursorMappedPageWithPrefixCompression(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1515 {
1516 uint32_t pageSize = page->length - sizeof(Page);
1517 uint32_t offset = cursor->entryOffsetInPage;
1518 uint32_t minPrefixLength = 0;
1519 if (cursor->isOnPage) {
1520 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset];
1521 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1;
1522 if (remainingLength >= 0 && remainingLength <= capacity) {
1523 memcpy(bytes + length, entry->string + cursor->offsetInEntry + 1, remainingLength);
1524 callback(ctx, bytes, length + remainingLength, entry->payload, stop);
1525 if (*stop)
1526 return;
1527 }
1528 minPrefixLength = entry->pfxLen + cursor->offsetInEntry;
1529 offset += getPackedPageEntrySize(entry);
1530 }
1531 PageEntryPacked *previousEntry = NULL;
1532 while (offset < pageSize) {
1533 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset];
1534 if (minPrefixLength > entry->pfxLen)
1535 break;
1536 else if (entry->payload && entry->strlen <= capacity) {
1537 if (previousEntry)
1538 length -= previousEntry->strlen + previousEntry->pfxLen - entry->pfxLen;
1539 memcpy(bytes + length, entry->string, entry->strlen);
1540 callback(ctx, bytes, length + entry->strlen, entry->payload, stop);
1541 length += entry->strlen;
1542 if (*stop)
1543 return;
1544 }
1545 previousEntry = entry;
1546 offset += getPackedPageEntrySize(entry);
1547 }
1548 }
1549
1550 static void traverseFromMapCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1551 {
1552 uint32_t pageSize = page->length - sizeof(Page);
1553 uint32_t offset = cursor->entryOffsetInPage;
1554 uint32_t prefixLength = 0;
1555 const UInt8 *prefix = NULL;
1556 if (cursor->isOnPage) {
1557 PageEntry *entry = (PageEntry*)&page->data[offset];
1558 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1;
1559 if (remainingLength >= 0 && remainingLength <= capacity) {
1560 memcpy(bytes + length, entry->string + cursor->offsetInEntry, remainingLength);
1561 callback(ctx, bytes, length + remainingLength, entry->payload, stop);
1562 if (*stop)
1563 return;
1564 }
1565 prefixLength = cursor->offsetInEntry + 1;
1566 prefix = entry->string;
1567 offset += getPageEntrySize(entry);
1568 }
1569
1570 while (offset < pageSize) {
1571 PageEntry *entry = (PageEntry*)&page->data[offset];
1572 if (entry->strlen < prefixLength)
1573 break;
1574 else {
1575 int cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength);
1576 if (cmpResult > 0)
1577 break;
1578 else if (entry->payload && entry->strlen <= capacity) {
1579 if (entry->strlen > 0)
1580 memcpy(bytes + length, entry->string + prefixLength, entry->strlen - prefixLength);
1581 callback(ctx, bytes, length + entry->strlen - prefixLength, entry->payload, stop);
1582 if (*stop)
1583 return;
1584 }
1585 offset += getPageEntrySize(entry);
1586 }
1587 }
1588 }
1589
1590 static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1591 {
1592 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next);
1593 if (trie->cflags & kCFBurstTrieSortByKey)
1594 traverseFromMapCursorMappedPageSortedByKey(page, cursor, bytes, capacity, length, stop, ctx, callback);
1595 else if (trie->cflags & kCFBurstTriePrefixCompression)
1596 traverseFromMapCursorMappedPageWithPrefixCompression(page, cursor, bytes, capacity, length, stop, ctx, callback);
1597 }
1598
1599 void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback)
1600 {
1601 switch (DiskNextTrie_GetKind(cursor->next)) {
1602 case TrieKind:
1603 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1604 break;
1605 case CompactTrieKind:
1606 traverseFromMapCursorCompactMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1607 break;
1608 case ListKind:
1609 traverseFromMapCursorMappedPage(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1610 break;
1611 case Nothing: {
1612 TrieHeader *header = (TrieHeader*)trie->mapBase;
1613 if (cursor->next == header->rootOffset) {
1614 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback);
1615 }
1616 break;
1617 }
1618 }
1619 }
1620
1621 void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination)
1622 {
1623 destination->next = source->next;
1624 destination->entryOffsetInPage = source->entryOffsetInPage;
1625 destination->offsetInEntry = source->offsetInEntry;
1626 destination->isOnPage = source->isOnPage;
1627 destination->payload = source->payload;
1628 }
1629
1630 Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs)
1631 {
1632 return lhs->entryOffsetInPage == rhs->entryOffsetInPage && lhs->isOnPage == rhs->isOnPage && lhs->next == rhs->next && lhs->offsetInEntry == rhs->offsetInEntry;
1633 }
1634
1635 static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload)
1636 {
1637 if (payload)
1638 *payload = 0;
1639 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) {
1640 if (payload)
1641 *payload = entry->payload;
1642 return TRUE;
1643 } else if (cursor->offsetInEntry != entry->strlen - 1)
1644 return FALSE;
1645 else {
1646 if (payload)
1647 *payload = entry->payload;
1648 return TRUE;
1649 }
1650 }
1651
1652 static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload)
1653 {
1654 if (payload)
1655 *payload = 0;
1656 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) {
1657 if (payload)
1658 *payload = entry->payload;
1659 return TRUE;
1660 } else if (cursor->offsetInEntry != entry->strlen - 1)
1661 return FALSE;
1662 else {
1663 if (payload)
1664 *payload = entry->payload;
1665 return TRUE;
1666 }
1667 }
1668
1669 Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload)
1670 {
1671 if (!cursor)
1672 return FALSE;
1673 if (cursor->payload) {
1674 if (payload)
1675 *payload = cursor->payload;
1676 return TRUE;
1677 }
1678 return FALSE;
1679 }
1680
1681 // Legacy
1682
1683 static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
1684 Boolean success = false;
1685 if (length) {
1686 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[*key]);
1687 if(DiskNextTrie_GetKind(offset) == TrieKind) {
1688 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map,offset), map, key+1, length-1, payload, prefix);
1689 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1690 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
1691 } else {
1692 if(DiskNextTrie_GetKind(offset) == ListKind) {
1693 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix);
1694 } else {
1695 return success;
1696 }
1697 }
1698 } else {
1699 if (trie->weight) {
1700 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload));
1701 success = true;
1702 }
1703 }
1704 return success;
1705 }
1706
1707 static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
1708 Boolean success = false;
1709 if (length) {
1710 uint32_t mykey = *key;
1711 uint32_t slot = mykey / 64;
1712 uint32_t bit = mykey % 64;
1713 uint32_t item = 0;
1714 uint64_t bword = CFSwapInt64LittleToHost(trie->bitmap[slot]);
1715 if (bword & (1ull << bit)) {
1716 /* Count all the set bits up to this bit */
1717 for (int i=0; i < slot; i++) {
1718 item += __builtin_popcountll(trie->bitmap[i]);
1719 }
1720 item += __builtin_popcountll(bword & ((1ull << bit)-1));
1721 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[item]);
1722 if(DiskNextTrie_GetKind(offset) == TrieKind) {
1723 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
1724 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) {
1725 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix);
1726 }
1727 else {
1728 if(DiskNextTrie_GetKind(offset) == ListKind) {
1729 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix);
1730 } else {
1731 return success;
1732 }
1733 }
1734 }
1735 } else {
1736 if (trie->weight) {
1737 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload));
1738 success = true;
1739 }
1740 }
1741 return success;
1742 }
1743
1744 static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) {
1745 Boolean success = false;
1746 uint32_t end = CFSwapInt32LittleToHost(page->length);
1747 uint32_t cur = 0;
1748 if (prefix) {
1749 uint8_t pfx[256];
1750 while (cur < end) {
1751 StringPageEntryPacked *entry = (StringPageEntryPacked *)&page->data[cur];
1752 uint16_t strlen = entry->pfxLen+CFSwapInt16LittleToHost(entry->strlen);
1753 if (strlen == length && __builtin_memcmp(pfx, key, entry->pfxLen) == 0 && __builtin_memcmp(entry->string, key+entry->pfxLen, length-entry->pfxLen) == 0) {
1754 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload));
1755 success = true;
1756 return success;
1757 } else {
1758 memcpy(pfx+entry->pfxLen, entry->string, MIN(255-entry->pfxLen, length-entry->pfxLen));
1759 cur += sizeof(*entry) + strlen - entry->pfxLen;
1760 }
1761 }
1762 } else {
1763 while (cur < end) {
1764 StringPageEntry *entry = (StringPageEntry *)&page->data[cur];
1765 uint16_t strlen = CFSwapInt16LittleToHost(entry->strlen);
1766 if (strlen == length && __builtin_memcmp(entry->string, key, length) == 0) {
1767 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload));
1768 success = true;
1769 return success;
1770 } else {
1771 cur += sizeof(*entry) + strlen;
1772 }
1773 }
1774
1775 }
1776 return success;
1777 }
1778
1779
1780 #if 0
1781 #pragma mark -
1782 #pragma mark Serialization
1783 #endif
1784
1785 static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie, TrieLevelRef root, uint32_t *offset, off_t start_offset, bool dispose, bool isroot, int fd)
1786 {
1787 bool dense = true;
1788 int count = 0;
1789
1790 for (int i=0; i < CHARACTER_SET_SIZE; i++) if (root->slots[i]) count++;
1791
1792 uint32_t this_offset = *offset;
1793
1794 if ((trie->cflags & kCFBurstTrieBitmapCompression) && count < MAX_BITMAP_SIZE && !isroot) {
1795 size_t size = sizeof(CompactMapTrieLevel) + sizeof(uint32_t) * count;
1796 int offsetSlot = 0;
1797
1798 CompactMapTrieLevel *maptrie = (CompactMapTrieLevel *)alloca(size);
1799 bzero(maptrie, size);
1800 *offset += size;
1801
1802 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1803 NextTrie next = root->slots[i];
1804 if (next) {
1805 uint32_t slot = i / 64;
1806 uint32_t bit = i % 64;
1807 maptrie->bitmap[slot] |= 1ull<<bit;
1808 if (NextTrie_GetKind(next) == TrieKind) {
1809 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
1810 uint32_t childOffset = *offset;
1811 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) {
1812 maptrie->slots[offsetSlot] = (TrieKind|childOffset);
1813 } else {
1814 maptrie->slots[offsetSlot] = (CompactTrieKind|childOffset);
1815 }
1816 } else {
1817 maptrie->slots[offsetSlot] = next;
1818 }
1819 offsetSlot++;
1820 }
1821 }
1822 maptrie->payload = root->payload;
1823
1824 int bitcount = 0;
1825 for (int i=0; i < 4; i++) bitcount += __builtin_popcountll(maptrie->bitmap[i]);
1826 assert(bitcount == count);
1827
1828 pwrite(fd, maptrie, size, this_offset+start_offset);
1829 dense = false;
1830 } else {
1831 MapTrieLevel maptrie;
1832 *offset += sizeof(maptrie);
1833
1834 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1835 NextTrie next = root->slots[i];
1836 if (NextTrie_GetKind(next) == TrieKind) {
1837 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
1838 uint32_t childOffset = *offset;
1839 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) {
1840 maptrie.slots[i] = (TrieKind|childOffset);
1841 } else {
1842 maptrie.slots[i] = (CompactTrieKind|childOffset);
1843 }
1844 } else {
1845 maptrie.slots[i] = next;
1846 }
1847 }
1848 maptrie.payload = root->payload;
1849 pwrite(fd, &maptrie, sizeof(maptrie), this_offset+start_offset);
1850 }
1851
1852 if (dispose) free(root);
1853 return dense;
1854 }
1855
1856 static void serializeCFBurstTrieList(CFBurstTrieRef trie, ListNodeRef listNode, int fd)
1857 {
1858 uint32_t listCount;
1859 size_t size = trie->containerSize;
1860
1861 // ** Temp list of nodes to sort
1862 ListNodeRef *nodes = (ListNodeRef *)malloc(sizeof(ListNodeRef) * size);
1863 for (listCount = 0; listNode; listCount++) {
1864 if (listCount >= size) {
1865 size *= 2;
1866 nodes = (ListNodeRef *)realloc(nodes, sizeof(ListNodeRef) * size);
1867 }
1868 nodes[listCount] = listNode;
1869 listNode = listNode->next;
1870 }
1871
1872 char _buffer[MAX_BUFFER_SIZE];
1873 size_t bufferSize = (sizeof(Page) + size * (sizeof(PageEntryPacked) + MAX_STRING_SIZE));
1874 char *buffer = bufferSize < MAX_BUFFER_SIZE ? _buffer : (char *) malloc(bufferSize);
1875
1876 Page *page = (Page *)buffer;
1877 uint32_t current = 0;
1878
1879 if (trie->cflags & kCFBurstTriePrefixCompression) {
1880 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare);
1881
1882 ListNodeRef last = 0;
1883 for (int i=0; i < listCount; i++) {
1884 listNode = nodes[i];
1885 uint8_t pfxLen = 0;
1886 if (last) {
1887 for ( ;
1888 pfxLen < CHARACTER_SET_SIZE-1 &&
1889 pfxLen < listNode->length &&
1890 pfxLen < last->length &&
1891 listNode->string[pfxLen] == last->string[pfxLen];
1892 pfxLen++);
1893 }
1894
1895 PageEntryPacked *entry = (PageEntryPacked *)(&page->data[current]);
1896 entry->strlen = listNode->length - pfxLen;
1897 entry->payload = listNode->payload;
1898 entry->pfxLen = pfxLen;
1899 memcpy(entry->string, listNode->string+pfxLen, listNode->length-pfxLen);
1900 current += listNode->length - pfxLen + sizeof(PageEntryPacked);
1901 last = listNode;
1902 }
1903 } else {
1904 if (trie->cflags & kCFBurstTrieSortByKey)
1905 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare);
1906 else
1907 qsort(nodes, listCount, sizeof(ListNodeRef), nodeWeightCompare);
1908
1909 for (int i=0; i < listCount; i++) {
1910 listNode = nodes[i];
1911 PageEntry *entry = (PageEntry *)(&page->data[current]);
1912 entry->strlen = listNode->length;
1913 entry->payload = listNode->payload;
1914 memcpy(entry->string, listNode->string, listNode->length);
1915 current += listNode->length + sizeof(PageEntry);
1916 }
1917 }
1918
1919 size_t len = (sizeof(Page) + current + 3) & ~3;
1920 page->length = current;
1921 write(fd, page, len);
1922
1923 free(nodes);
1924 if (buffer != _buffer) free(buffer);
1925 }
1926
1927 static void serializeCFBurstTrieLists(CFBurstTrieRef trie, TrieLevelRef root, off_t start_offset, int fd)
1928 {
1929 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
1930 NextTrie next = root->slots[i];
1931 uint32_t offset;
1932 if (NextTrie_GetKind(next) == TrieKind) {
1933 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next);
1934 serializeCFBurstTrieLists(trie, nextLevel, start_offset, fd);
1935 } else {
1936 if (NextTrie_GetKind(next) == ListKind) {
1937 ListNodeRef listNode = (ListNodeRef)NextTrie_GetPtr(next);
1938 offset = lseek(fd, 0, SEEK_CUR) - start_offset;
1939 serializeCFBurstTrieList(trie, listNode, fd);
1940 finalizeCFBurstTrieList(listNode);
1941 //assert((offset & 3)==0);
1942 root->slots[i] = (offset|ListKind);
1943 }
1944 }
1945 }
1946 }
1947
1948 static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd)
1949 {
1950 TrieHeader header;
1951 header.signature = 0x0ddba11;
1952 header.rootOffset = 0;
1953 header.count = trie->count;
1954 header.size = 0;
1955 header.flags = trie->cflags;
1956 header.reserved[0] = 0;
1957
1958 uint32_t offset;
1959 lseek(fd, start_offset, SEEK_SET);
1960
1961 size_t header_size = sizeof(header);
1962 write(fd, &header, header_size);
1963
1964 serializeCFBurstTrieLists(trie, &trie->root, start_offset, fd);
1965
1966 offset = lseek(fd, 0, SEEK_CUR) - start_offset;
1967 size_t off = offsetof(TrieHeader, rootOffset);
1968 size_t offsize = sizeof(offset);
1969 pwrite(fd, &offset, offsize, off+start_offset);
1970
1971 serializeCFBurstTrieLevels(trie, &trie->root, &offset, start_offset, false, true, fd);
1972
1973 size_t off2 = offsetof(TrieHeader, size);
1974 offsize = sizeof(offset);
1975 pwrite(fd, &offset, offsize, off2+start_offset);
1976
1977 offset = lseek(fd, 0, SEEK_END);
1978 return (size_t)(offset-start_offset);
1979 }
1980
1981 #if 0
1982 #pragma mark -
1983 #pragma mark Release
1984 #endif
1985
1986 static void destroyCFBurstTrie(CFBurstTrieRef trie) {
1987 if (trie->mapBase) {
1988 #if DEPLOYMENT_TARGET_WINDOWS
1989 UnmapViewOfFile(trie->mapBase);
1990 CloseHandle(trie->mapHandle);
1991 CloseHandle(trie->mappedFileHandle);
1992 #else
1993 munmap(trie->mapBase, trie->mapSize);
1994 #endif
1995 } else {
1996 finalizeCFBurstTrie(&trie->root);
1997 }
1998 free(trie);
1999 return;
2000 }
2001
2002 static void finalizeCFBurstTrie(TrieLevelRef trie) {
2003 for (int i=0; i < CHARACTER_SET_SIZE; i++) {
2004 if (NextTrie_GetKind(trie->slots[i]) == TrieKind) {
2005 finalizeCFBurstTrie((TrieLevelRef)NextTrie_GetPtr(trie->slots[i]));
2006 free((void *)NextTrie_GetPtr(trie->slots[i]));
2007 } else if (NextTrie_GetKind(trie->slots[i]) == ListKind) {
2008 finalizeCFBurstTrieList((ListNodeRef)NextTrie_GetPtr(trie->slots[i]));
2009 }
2010 }
2011 }
2012
2013 static void finalizeCFBurstTrieList(ListNodeRef node) {
2014 do {
2015 ListNodeRef next = node->next;
2016 free(node);
2017 node = next;
2018 } while(node);
2019 }
2020
2021 #if 0
2022 #pragma mark -
2023 #pragma mark Helpers
2024 #endif
2025
2026 static int nodeWeightCompare(const void *a, const void *b) {
2027 ListNodeRef nodeA = *(ListNodeRef *)a;
2028 ListNodeRef nodeB = *(ListNodeRef *)b;
2029 return (nodeB->weight - nodeA->weight);
2030 }
2031
2032 static int nodeStringCompare(const void *a, const void *b) {
2033 ListNodeRef nodeA = *(ListNodeRef *)a;
2034 ListNodeRef nodeB = *(ListNodeRef *)b;
2035 int result = memcmp((char *)nodeA->string, (char *)nodeB->string, MIN(nodeA->length, nodeB->length));
2036 if (result == 0) result = nodeA->length-nodeB->length;
2037 return result;
2038 }
2039
2040 static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact)
2041 {
2042 uint32_t *ctx = (uint32_t *)context;
2043 if (exact) *ctx = payload;
2044 return exact;
2045 }
2046
2047 static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer) {
2048 uint32_t i, j;
2049 for (i = j = 0; i < numChars; i++) {
2050 UniChar c = chars[i];
2051 if (CFStringIsSurrogateHighCharacter(c) && i + 1 < numChars && CFStringIsSurrogateLowCharacter(chars[i + 1])) {
2052 UTF32Char lc = CFStringGetLongCharacterForSurrogatePair(c, chars[i + 1]);
2053 buffer[j++] = 0xf0 + (lc >> 18);
2054 buffer[j++] = 0x80 + ((lc & 0x3ffff) >> 12);
2055 buffer[j++] = 0x80 + ((lc & 0xfff) >> 6);
2056 buffer[j++] = 0x80 + (lc & 0x3f);
2057 i++;
2058 } else {
2059 if (c < 0x80) {
2060 buffer[j++] = c;
2061 } else if (c < 0x800) {
2062 buffer[j++] = 0xc0 + (c >> 6);
2063 buffer[j++] = 0x80 + (c & 0x3f);
2064 } else {
2065 buffer[j++] = 0xe0 + (c >> 12);
2066 buffer[j++] = 0x80 + ((c & 0xfff) >> 6);
2067 buffer[j++] = 0x80 + (c & 0x3f);
2068 }
2069 }
2070 }
2071 buffer[j] = 0;
2072 return j;
2073 }
2074