2 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 2008-2013, Apple Inc. All rights reserved.
26 Responsibility: Jennifer Moore
29 #include "CFInternal.h"
30 #include "CFBurstTrie.h"
31 #include "CFByteOrder.h"
39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
41 #include <sys/param.h>
48 #if DEPLOYMENT_TARGET_WINDOWS
50 #define statinfo _stat
51 #define stat(x,y) _NS_stat(x,y)
52 #define __builtin_memcmp(x, y, z) memcmp(x, y, z)
53 #define __builtin_popcountll(x) popcountll(x)
54 #define bzero(dst, size) ZeroMemory(dst, size)
58 static int pwrite(int fd
, const void *buf
, size_t nbyte
, off_t offset
) {
63 _lseek(fd
, offset
, SEEK_SET
);
66 int res
= _write(fd
, buf
, nbyte
);
68 // Return to previous offset
69 _lseek(fd
, pos
, SEEK_SET
);
79 #pragma mark Types and Utilities
82 #define MAX_STRING_ALLOCATION_SIZE 342
83 #define MAX_STRING_SIZE 1024
84 #define MAX_KEY_LENGTH MAX_STRING_SIZE * 4
85 #define CHARACTER_SET_SIZE 256
86 #define MAX_LIST_SIZE 256 // 64
87 #define MAX_BITMAP_SIZE 200
88 #define MAX_BUFFER_SIZE (4096<<2)
90 #define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3))
91 #define NextTrie_GetKind(p) (p & 3)
92 #define NextTrie_SetKind(p, kind) (p |= (3&kind))
94 #define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3)))
95 #define DiskNextTrie_GetKind(p) (p & 3)
96 #define DiskNextTrie_SetKind(p, kind) (p |= (3&kind))
98 // Use this macro to avoid forgetting to check the pointer before assigning value to it.
99 #define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0)
101 enum { Nothing
= 0, TrieKind
= 1, ListKind
= 2, CompactTrieKind
= 3 };
102 typedef enum { FailedInsert
= 0, NewTerm
= 1, ExistingTerm
= 2 } CFBTInsertCode
;
105 typedef uintptr_t NextTrie
;
107 typedef struct _TrieLevel
{
108 NextTrie slots
[CHARACTER_SET_SIZE
];
112 typedef TrieLevel
*TrieLevelRef
;
114 typedef struct _MapTrieLevel
{
115 uint32_t slots
[CHARACTER_SET_SIZE
];
118 typedef MapTrieLevel
*MapTrieLevelRef
;
120 typedef struct _CompactMapTrieLevel
{
121 uint64_t bitmap
[CHARACTER_SET_SIZE
/ 64];
124 } CompactMapTrieLevel
;
125 typedef CompactMapTrieLevel
*CompactMapTrieLevelRef
;
127 typedef struct _ListNode
{
128 struct _ListNode
*next
;
135 typedef struct _Page
{
140 typedef struct _PageEntryPacked
{
147 typedef struct _PageEntry
{
153 typedef struct _TrieHeader
{
159 uint64_t reserved
[16];
162 typedef struct _TrieCursor
{
168 const uint8_t *prefix
;
169 uint8_t key
[MAX_KEY_LENGTH
];
172 typedef struct _MapCursor
{
178 const uint8_t *prefix
;
179 uint8_t key
[MAX_STRING_SIZE
*4];
182 typedef struct _CompactMapCursor
{
184 uint32_t entryOffsetInPage
;
185 uint32_t offsetInEntry
;
187 // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether
188 // the cursor is merely pointing at the beginning of the page, or the first entry.
189 // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level,
190 // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor
191 // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and
192 // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the
193 // trie. the page level will actually contains three entries. The first entry corresponds to string "a".
194 // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will
195 // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still
196 // set to 0s. So we need an additional variable to make distinction between these two situations.
199 typedef struct _CompactMapCursor
*MapCursorRef
;
202 _kCFBurstTrieCursorTrieType
= 0,
203 _kCFBurstTrieCursorMapType
206 typedef struct _CFBurstTrieCursor
{
207 CompactMapCursor mapCursor
;
210 } _CFBurstTrieCursor
;
213 typedef struct _DiskTrieLevel
{
214 uint32_t slots
[CHARACTER_SET_SIZE
];
218 typedef DiskTrieLevel
*DiskTrieLevelRef
;
220 typedef struct _CompactDiskTrieLevel
{
221 uint64_t bitmap
[CHARACTER_SET_SIZE
/ 64]; // CHARACTER_SET_SIZE / 64bits per word
225 } CompactDiskTrieLevel
;
226 typedef CompactDiskTrieLevel
*CompactDiskTrieLevelRef
;
228 typedef struct _StringPage
{
233 typedef struct _StringPageEntryPacked
{
235 uint16_t strlen
; // make uint8_t if possible
238 } StringPageEntryPacked
;
240 typedef struct _StringPageEntry
{
241 uint16_t strlen
; // make uint8_t if possible
246 typedef struct _fileHeader
{
256 struct _CFBurstTrie
{
259 DiskTrieLevel diskRoot
;
260 MapTrieLevel maproot
;
267 uint32_t containerSize
;
269 #if DEPLOYMENT_TARGET_WINDOWS
271 HANDLE mappedFileHandle
;
277 #pragma mark Forward declarations
280 typedef struct _TraverseContext
{
282 void (*callback
)(void*, const UInt8
*, uint32_t, uint32_t);
285 static bool foundKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
)
287 if (context
!= NULL
) {
288 TraverseContext
*ctx
= (TraverseContext
*)context
;
289 if (ctx
->context
&& ctx
->callback
) {
290 ctx
->callback(ctx
->context
, key
, 1, payload
);
296 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
298 static CFBTInsertCode
addCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
);
300 static void findCFBurstTrieLevel(CFBurstTrieRef trie
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool));
301 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool));
302 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
303 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
304 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, CompactMapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
305 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
307 static size_t serializeCFBurstTrie(CFBurstTrieRef trie
, size_t start_offset
, int fd
);
309 static Boolean
burstTrieMappedFind(DiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
310 static Boolean
burstTrieMappedPageFind(StringPage
*page
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
311 static Boolean
burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
313 static void destroyCFBurstTrie(CFBurstTrieRef trie
);
314 static void finalizeCFBurstTrie(TrieLevelRef trie
);
315 static void finalizeCFBurstTrieList(ListNodeRef node
);
317 static int nodeWeightCompare(const void *a
, const void *b
);
318 static int nodeStringCompare(const void *a
, const void *b
);
320 static bool foundKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
);
321 static bool containsKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
);
323 static CFIndex
burstTrieConvertCharactersToUTF8(UniChar
*chars
, CFIndex numChars
, UInt8
*buffer
);
325 static Boolean
advanceMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
);
326 static Boolean
getMapCursorPayload(CFBurstTrieRef trie
, const CompactMapCursor
*cursor
, uint32_t *payload
);
327 static void copyMapCursor(const CompactMapCursor
*source
, CompactMapCursor
* destination
);
328 static Boolean
areMapCursorsEqual(const CompactMapCursor
*lhs
, const CompactMapCursor
*rhs
);
329 static void traverseFromMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
);
330 static Boolean
getMapCursorPayloadFromPackedPageEntry(PageEntryPacked
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
);
331 static Boolean
getMapCursorPayloadFromPageEntry(PageEntry
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
);
333 CFBurstTrieRef
CFBurstTrieCreateWithOptions(CFDictionaryRef options
) {
334 CFBurstTrieRef trie
= NULL
;
335 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
336 trie
->containerSize
= MAX_LIST_SIZE
;
338 CFNumberRef valueAsCFNumber
;
339 if (CFDictionaryGetValueIfPresent(options
, kCFBurstTrieCreationOptionNameContainerSize
, (const void **)&valueAsCFNumber
)) {
341 CFNumberGetValue(valueAsCFNumber
, kCFNumberIntType
, &value
);
342 trie
->containerSize
= value
> 2 && value
< 4096 ? value
: MAX_LIST_SIZE
;
348 CFBurstTrieRef
CFBurstTrieCreate() {
349 CFBurstTrieRef trie
= NULL
;
350 int listSize
= MAX_LIST_SIZE
;
351 CFNumberRef value
= CFNumberCreate(kCFAllocatorDefault
, kCFNumberIntType
, &listSize
);
352 CFMutableDictionaryRef options
= CFDictionaryCreateMutable(kCFAllocatorDefault
, 1, NULL
, NULL
);
353 CFDictionarySetValue(options
, kCFBurstTrieCreationOptionNameContainerSize
, value
);
354 trie
= CFBurstTrieCreateWithOptions(options
);
360 CFBurstTrieRef
CFBurstTrieCreateFromFile(CFStringRef path
) {
362 char filename
[PATH_MAX
];
365 /* Check valid path name */
366 if (!CFStringGetCString(path
, filename
, PATH_MAX
, kCFStringEncodingUTF8
)) return NULL
;
368 /* Check if file exists */
369 if (stat(filename
, &sb
) != 0) return NULL
;
371 /* Check if file can be opened */
372 if ((fd
=open(filename
, CF_OPENFLGS
|O_RDONLY
)) < 0) return NULL
;
374 #if DEPLOYMENT_TARGET_WINDOWS
375 HANDLE mappedFileHandle
= (HANDLE
)_get_osfhandle(fd
);
376 if (!mappedFileHandle
) return NULL
;
378 HANDLE mapHandle
= CreateFileMapping(mappedFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
379 if (!mapHandle
) return NULL
;
381 char *map
= (char *)MapViewOfFile(mapHandle
, FILE_MAP_READ
, 0, 0, sb
.st_size
);
382 if (!map
) return NULL
;
384 char *map
= mmap(0, sb
.st_size
, PROT_READ
, MAP_FILE
|MAP_SHARED
, fd
, 0);
387 CFBurstTrieRef trie
= NULL
;
388 TrieHeader
*header
= (TrieHeader
*)map
;
390 if (((uint32_t*)map
)[0] == 0xbabeface) {
391 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
393 trie
->mapSize
= CFSwapInt32LittleToHost(sb
.st_size
);
394 trie
->mapOffset
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->rootOffset
);
395 trie
->cflags
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->flags
);
396 trie
->count
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->count
);
398 #if DEPLOYMENT_TARGET_WINDOWS
399 trie
->mappedFileHandle
= mappedFileHandle
;
400 trie
->mapHandle
= mapHandle
;
402 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
405 } else if (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11) {
406 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
408 trie
->mapSize
= CFSwapInt32LittleToHost(sb
.st_size
);
409 trie
->cflags
= CFSwapInt32LittleToHost(header
->flags
);
410 trie
->count
= CFSwapInt32LittleToHost(header
->count
);
412 #if DEPLOYMENT_TARGET_WINDOWS
413 trie
->mappedFileHandle
= mappedFileHandle
;
414 trie
->mapHandle
= mapHandle
;
416 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
423 CFBurstTrieRef
CFBurstTrieCreateFromMapBytes(char *mapBase
) {
424 CFBurstTrieRef trie
= NULL
;
425 TrieHeader
*header
= (TrieHeader
*)mapBase
;
427 if (mapBase
&& ((uint32_t*)mapBase
)[0] == 0xbabeface) {
428 trie
= (CFBurstTrieRef
) malloc(sizeof(struct _CFBurstTrie
));
429 trie
->mapBase
= mapBase
;
430 trie
->mapSize
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->size
);
431 trie
->mapOffset
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->rootOffset
);
432 trie
->cflags
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->flags
);
433 trie
->count
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->count
);
435 } else if (mapBase
&& (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11)) {
436 trie
= (CFBurstTrieRef
) malloc(sizeof(struct _CFBurstTrie
));
437 trie
->mapBase
= mapBase
;
438 trie
->mapSize
= CFSwapInt32LittleToHost(header
->size
);
439 trie
->cflags
= CFSwapInt32LittleToHost(header
->flags
);
440 trie
->count
= CFSwapInt32LittleToHost(header
->count
);
446 Boolean
CFBurstTrieInsert(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex payload
) {
447 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, 1, (uint32_t)payload
);
450 Boolean
CFBurstTrieAdd(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t payload
) {
451 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, 1, payload
);
454 Boolean
CFBurstTrieInsertCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex payload
) {
455 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, 1, (uint32_t)payload
);
458 Boolean
CFBurstTrieAddCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t payload
) {
459 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, 1, payload
);
462 Boolean
CFBurstTrieInsertUTF8String(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, CFIndex payload
) {
463 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, 1, (uint32_t)payload
);
466 Boolean
CFBurstTrieAddUTF8String(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, uint32_t payload
) {
467 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, 1, payload
);
470 Boolean
CFBurstTrieInsertWithWeight(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex weight
, CFIndex payload
) {
471 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, weight
, (uint32_t)payload
);
474 Boolean
CFBurstTrieAddWithWeight(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t weight
, uint32_t payload
) {
475 Boolean success
= false;
476 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
477 CFIndex bytesize
= termRange
.length
* 4; //** 4-byte max character size
478 if (!trie
->mapBase
&& termRange
.length
< MAX_STRING_SIZE
&& payload
> 0) {
480 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
482 if (bytesize
>= size
) {
484 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
486 CFStringGetBytes(term
, termRange
, kCFStringEncodingUTF8
, (UInt8
)'-', (Boolean
)0, key
, size
, &length
);
489 success
= CFBurstTrieAddUTF8StringWithWeight(trie
, key
, length
, weight
, payload
);
490 if (buffer
!= key
) free(key
);
495 Boolean
CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex weight
, CFIndex payload
) {
496 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, weight
, (uint32_t)payload
);
499 Boolean
CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t weight
, uint32_t payload
) {
500 Boolean success
= false;
501 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
502 CFIndex bytesize
= numChars
* 4; //** 4-byte max character size
503 if (!trie
->mapBase
&& numChars
< MAX_STRING_SIZE
&& payload
> 0) {
505 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
507 if (bytesize
>= size
) {
509 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
511 length
= burstTrieConvertCharactersToUTF8(chars
, numChars
, key
);
514 success
= CFBurstTrieAddUTF8StringWithWeight(trie
, key
, length
, weight
, payload
);
515 if (buffer
!= key
) free(key
);
520 Boolean
CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, CFIndex weight
, CFIndex payload
) {
521 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, weight
, (uint32_t)payload
);
524 Boolean
CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, uint32_t weight
, uint32_t payload
) {
525 CFBTInsertCode code
= FailedInsert
;
527 if (!trie
->mapBase
&& numChars
< MAX_STRING_SIZE
*4 && payload
> 0) {
528 code
= addCFBurstTrieLevel(trie
, &trie
->root
, chars
, numChars
, weight
, payload
);
529 if (code
== NewTerm
) trie
->count
++;
531 return code
> FailedInsert
;
534 Boolean
CFBurstTrieFind(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex
*payload
) {
536 if (CFBurstTrieContains(trie
, term
, termRange
, &p
)) {
537 SetPayload(payload
, p
);
543 Boolean
CFBurstTrieContains(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t *payload
) {
544 Boolean success
= false;
545 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
546 CFIndex bytesize
= termRange
.length
* 4; //** 4-byte max character size
547 if (termRange
.length
< MAX_STRING_SIZE
) {
549 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+1];
551 if (bytesize
>= size
) {
553 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
555 CFStringGetBytes(term
, termRange
, kCFStringEncodingUTF8
, (UInt8
)'-', (Boolean
)0, key
, size
, &length
);
558 success
= CFBurstTrieContainsUTF8String(trie
, key
, length
, payload
);
559 if (buffer
!= key
) free(key
);
564 Boolean
CFBurstTrieFindCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex
*payload
) {
566 if (CFBurstTrieContainsCharacters(trie
, chars
, numChars
, &p
)) {
567 SetPayload(payload
, p
);
573 Boolean
CFBurstTrieContainsCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t *payload
) {
574 Boolean success
= false;
575 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
576 CFIndex bytesize
= numChars
* 4; //** 4-byte max character size
577 if (numChars
< MAX_STRING_SIZE
) {
579 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
581 if (bytesize
>= size
) {
583 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
585 length
= burstTrieConvertCharactersToUTF8(chars
, numChars
, key
);
588 success
= CFBurstTrieContainsUTF8String(trie
, key
, length
, payload
);
589 if (buffer
!= key
) free(key
);
594 Boolean
CFBurstTrieFindUTF8String(CFBurstTrieRef trie
, UInt8
*key
, CFIndex length
, CFIndex
*payload
) {
596 if (CFBurstTrieContainsUTF8String(trie
, key
, length
, &p
)) {
597 SetPayload(payload
, p
);
603 Boolean
CFBurstTrieContainsUTF8String(CFBurstTrieRef trie
, UInt8
*key
, CFIndex length
, uint32_t *payload
) {
604 Boolean success
= false;
605 if (length
< MAX_STRING_SIZE
) {
606 if (trie
->mapBase
&& ((fileHeader
*)trie
->mapBase
)->signature
== 0xbabeface) {
607 bool prefix
= (trie
->cflags
& kCFBurstTriePrefixCompression
);
608 success
= burstTrieMappedFind((DiskTrieLevelRef
)(trie
->mapBase
+CFSwapInt32LittleToHost((((uint32_t*)trie
->mapBase
)[1]))), trie
->mapBase
, key
, length
, payload
, prefix
);
609 } else if (trie
->mapBase
&& trie
->cflags
& (kCFBurstTriePrefixCompression
| kCFBurstTrieSortByKey
)) {
610 _CFBurstTrieCursor cursor
;
611 if (!CFBurstTrieSetCursorForBytes(trie
, &cursor
, key
, length
))
613 return CFBurstTrieCursorGetPayload(&cursor
, payload
);
617 traverseCFBurstTrieWithCursor(trie
, key
, length
, &cursor
, true, &found
, containsKey
);
618 if (found
) SetPayload(payload
, found
);
625 Boolean
CFBurstTrieSerialize(CFBurstTrieRef trie
, CFStringRef path
, CFBurstTrieOpts opts
) {
626 Boolean success
= false;
631 char filename
[PATH_MAX
];
633 /* Check valid path name */
634 if (!CFStringGetCString(path
, filename
, PATH_MAX
, kCFStringEncodingUTF8
)) return success
;
636 /* Check if file can be opened */
637 if ((fd
=open(filename
, CF_OPENFLGS
|O_RDWR
|O_CREAT
|O_TRUNC
, S_IRUSR
|S_IWUSR
)) < 0) return success
;
639 if (CFBurstTrieSerializeWithFileDescriptor(trie
, fd
, opts
)) success
= true;
646 Boolean
CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie
, int fd
, CFBurstTrieOpts opts
) {
647 Boolean success
= false;
648 if (!trie
->mapBase
&& fd
>= 0) {
649 off_t start_offset
= lseek(fd
, 0, SEEK_END
);
652 trie
->mapSize
= serializeCFBurstTrie(trie
, start_offset
, fd
);
654 #if DEPLOYMENT_TARGET_WINDOWS
655 HANDLE mappedFileHandle
= (HANDLE
)_get_osfhandle(fd
);
656 // We need to make sure we have our own handle to keep this file open as long as the mmap lasts
657 DuplicateHandle(GetCurrentProcess(), mappedFileHandle
, GetCurrentProcess(), &mappedFileHandle
, 0, 0, DUPLICATE_SAME_ACCESS
);
658 HANDLE mapHandle
= CreateFileMapping(mappedFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
659 if (!mapHandle
) return NULL
;
660 char *map
= (char *)MapViewOfFile(mapHandle
, FILE_MAP_READ
, 0, start_offset
, trie
->mapSize
);
661 if (!map
) return NULL
;
663 trie
->mapHandle
= mapHandle
;
664 trie
->mappedFileHandle
= mappedFileHandle
;
666 trie
->mapBase
= mmap(0, trie
->mapSize
, PROT_READ
, MAP_FILE
|MAP_SHARED
, fd
, start_offset
);
674 void CFBurstTrieTraverse(CFBurstTrieRef trie
, void *ctx
, void (*callback
)(void*, const UInt8
*, uint32_t, uint32_t)) {
675 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
676 if (!trie
->mapBase
|| (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11)) {
678 TraverseContext context
;
679 context
.context
= ctx
;
680 context
.callback
= callback
;
681 traverseCFBurstTrieWithCursor(trie
, (const uint8_t *)"", 0, &cursor
, false, &context
, foundKey
);
686 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
688 traverseCFBurstTrieWithCursor(trie
, prefix
, prefixLen
, cursor
, false, ctx
, callback
);
691 void CFBurstTriePrint(CFBurstTrieRef trie
) {
695 CFIndex
CFBurstTrieGetCount(CFBurstTrieRef trie
) {
699 CFBurstTrieRef
CFBurstTrieRetain(CFBurstTrieRef trie
) {
704 void CFBurstTrieRelease(CFBurstTrieRef trie
) {
706 if (trie
->retain
== 0) destroyCFBurstTrie(trie
);
710 Boolean
CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie
, CFBurstTrieCursorRef cursor
, const UInt8
* bytes
, CFIndex length
)
712 if (!trie
->mapBase
|| !(trie
->cflags
& (kCFBurstTriePrefixCompression
| kCFBurstTrieSortByKey
))) {
713 //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n");
717 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
718 if (length
< 0 || !trie
)
723 cursor
->cursorType
= _kCFBurstTrieCursorMapType
;
724 cursor
->mapCursor
.next
= header
->rootOffset
;
725 cursor
->mapCursor
.isOnPage
= FALSE
;
726 cursor
->mapCursor
.entryOffsetInPage
= 0;
727 cursor
->mapCursor
.offsetInEntry
= 0;
728 cursor
->mapCursor
.payload
= 0;
732 if (!bytes
|| length
== 0)
735 return CFBurstTrieCursorAdvanceForBytes(cursor
, bytes
, length
);
739 CFBurstTrieCursorRef
CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie
, const UInt8
* bytes
, CFIndex length
)
741 CFBurstTrieCursorRef cursor
= (CFBurstTrieCursorRef
)calloc(sizeof(_CFBurstTrieCursor
), 1);
742 if (!CFBurstTrieSetCursorForBytes(trie
, cursor
, bytes
, length
)) {
743 CFBurstTrieCursorRelease(cursor
);
749 CFBurstTrieCursorRef
CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor
)
754 CFBurstTrieCursorRef newCursor
= (CFBurstTrieCursorRef
)calloc(sizeof(_CFBurstTrieCursor
), 1);
755 switch (cursor
->cursorType
) {
756 case _kCFBurstTrieCursorMapType
:
757 copyMapCursor(&cursor
->mapCursor
, &newCursor
->mapCursor
);
759 case _kCFBurstTrieCursorTrieType
:
763 newCursor
->cursorType
= cursor
->cursorType
;
764 newCursor
->trie
= cursor
->trie
;
768 Boolean
CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs
, CFBurstTrieCursorRef rhs
)
770 if (lhs
->trie
!= rhs
->trie
|| lhs
->cursorType
!= rhs
->cursorType
)
773 if (lhs
->cursorType
== _kCFBurstTrieCursorMapType
)
774 return areMapCursorsEqual(&lhs
->mapCursor
, &rhs
->mapCursor
);
779 Boolean
CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor
, const UInt8
* bytes
, CFIndex length
)
781 switch (cursor
->cursorType
) {
782 case _kCFBurstTrieCursorMapType
: {
783 CompactMapCursor tempCursor
;
784 copyMapCursor(&cursor
->mapCursor
, &tempCursor
);
785 if (advanceMapCursor(cursor
->trie
, (CompactMapCursor
*)&cursor
->mapCursor
, bytes
, length
))
788 copyMapCursor(&tempCursor
, &cursor
->mapCursor
);
792 case _kCFBurstTrieCursorTrieType
:
798 Boolean
CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor
, uint32_t *payload
)
800 switch (cursor
->cursorType
) {
801 case _kCFBurstTrieCursorMapType
:
802 return getMapCursorPayload(cursor
->trie
, (CompactMapCursor
*)&cursor
->mapCursor
, payload
);
803 case _kCFBurstTrieCursorTrieType
:
809 void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor
)
816 void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor
, void *ctx
, CFBurstTrieTraversalCallback callback
)
821 UInt8
*bytes
= (UInt8
*)calloc(1, MAX_KEY_LENGTH
);
822 uint32_t capacity
= MAX_KEY_LENGTH
;
824 Boolean stop
= FALSE
;
825 switch (cursor
->cursorType
) {
826 case _kCFBurstTrieCursorMapType
: {
827 CompactMapCursor tempCursor
;
828 copyMapCursor(&cursor
->mapCursor
, &tempCursor
);
829 traverseFromMapCursor(cursor
->trie
, &tempCursor
, bytes
, capacity
,length
, &stop
, ctx
, callback
);
832 case _kCFBurstTrieCursorTrieType
:
840 #pragma mark Insertion
843 static ListNodeRef
makeCFBurstTrieListNode(const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
) {
844 ListNodeRef node
= (ListNodeRef
) calloc(1, sizeof(*node
) + keylen
+ 1);
845 memcpy(node
->string
, key
, keylen
);
846 node
->string
[keylen
] = 0;
848 node
->length
= keylen
;
849 node
->weight
= weight
;
850 node
->payload
= payload
;
854 static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
) {
856 NextTrie next
= root
->slots
[*key
];
857 ListNodeRef newNode
= makeCFBurstTrieListNode(key
+1, keylen
-1, weight
, payload
);
858 newNode
->weight
= weight
;
859 newNode
->next
= (ListNodeRef
) NextTrie_GetPtr(next
);
860 next
= (uintptr_t) newNode
;
861 NextTrie_SetKind(next
, ListKind
);
862 root
->slots
[*key
] = next
;
864 // ** Handle payload.
865 root
->weight
= weight
;
866 root
->payload
= payload
;
870 static TrieLevelRef
burstCFBurstTrieLevel(CFBurstTrieRef trie
, ListNodeRef list
, uint32_t listCount
) {
871 TrieLevelRef newLevel
= (TrieLevelRef
) calloc(1, sizeof(struct _TrieLevel
));
873 addCFBurstTrieBurstLevel(trie
, newLevel
, list
->string
, list
->length
, list
->weight
, list
->payload
);
874 ListNodeRef temp
= list
;
881 static CFBTInsertCode
addCFBurstTrieListNode(CFBurstTrieRef trie
, ListNodeRef list
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
, uint32_t *listCount
)
883 CFBTInsertCode code
= FailedInsert
;
886 ListNodeRef last
= list
;
888 if (list
->length
== keylen
&& memcmp(key
, list
->string
, keylen
) == 0) {
889 list
->weight
+= weight
;
890 list
->payload
= payload
;
901 last
->next
= makeCFBurstTrieListNode(key
, keylen
, weight
, payload
);
909 static CFBTInsertCode
addCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
)
911 CFBTInsertCode code
= FailedInsert
;
913 NextTrie next
= root
->slots
[*key
];
914 if (NextTrie_GetKind(next
) == TrieKind
) {
915 TrieLevelRef nextLevel
= (TrieLevelRef
) NextTrie_GetPtr(next
);
916 code
= addCFBurstTrieLevel(trie
, nextLevel
, key
+1, keylen
-1, weight
, payload
);
918 if (NextTrie_GetKind(next
) == ListKind
) {
920 ListNodeRef listNode
= (ListNodeRef
) NextTrie_GetPtr(next
);
921 code
= addCFBurstTrieListNode(trie
, listNode
, key
+1, keylen
-1, weight
, payload
, &listCount
);
922 if (listCount
> trie
->containerSize
) {
923 next
= (uintptr_t) burstCFBurstTrieLevel(trie
, listNode
, listCount
);
924 NextTrie_SetKind(next
, TrieKind
);
927 // ** Make a new list node
928 next
= (uintptr_t) makeCFBurstTrieListNode(key
+1, keylen
-1, weight
, payload
);
929 NextTrie_SetKind(next
, ListKind
);
932 root
->slots
[*key
] = next
;
936 if (!root
->weight
) code
= NewTerm
;
937 else code
= ExistingTerm
;
938 root
->weight
+= weight
;
939 root
->payload
= payload
;
946 #pragma mark Searching
948 static void findCFBurstTrieList(CFBurstTrieRef trie
, TrieCursor
*cursor
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
950 ListNodeRef list
= (ListNodeRef
)NextTrie_GetPtr(cursor
->next
);
951 int len
= cursor
->prefixlen
-cursor
->keylen
;
952 len
= len
<= 0 ? 0 : len
;
954 int lencompare
= list
->length
-len
;
955 if (list
->length
>= len
&&
956 (len
== 0 || memcmp(list
->string
, cursor
->prefix
+cursor
->keylen
, len
) == 0)) {
957 memcpy(cursor
->key
+cursor
->keylen
, list
->string
, list
->length
);
958 cursor
->key
[cursor
->keylen
+list
->length
] = 0;
959 cursor
->next
= (NextTrie
)list
;
960 if (list
->payload
&& callback(ctx
, cursor
->key
, list
->payload
, lencompare
==0)) return;
966 static void findCFBurstTrieMappedPage(CFBurstTrieRef trie
, MapCursor
*cursor
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
968 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
969 uint32_t end
= page
->length
;
971 int len
= cursor
->prefixlen
-cursor
->keylen
;
972 len
= len
<= 0 ? 0 : len
;
973 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
974 uint8_t pfx
[CHARACTER_SET_SIZE
];
975 PageEntryPacked
*lastEntry
= 0;
977 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[cur
];
978 int lencompare
= (entry
->strlen
+entry
->pfxLen
)-len
;
979 if (lastEntry
&& entry
->pfxLen
>lastEntry
->pfxLen
) memcpy(pfx
+lastEntry
->pfxLen
, lastEntry
->string
, entry
->pfxLen
-lastEntry
->pfxLen
);
980 if (lencompare
>= 0 &&
981 (len
== 0 || (__builtin_memcmp(pfx
, cursor
->prefix
+cursor
->keylen
, entry
->pfxLen
) == 0 &&
982 __builtin_memcmp(entry
->string
, cursor
->prefix
+cursor
->keylen
+entry
->pfxLen
, cursor
->prefixlen
-cursor
->keylen
-entry
->pfxLen
) == 0))) {
983 memcpy(cursor
->key
+cursor
->keylen
, pfx
, entry
->pfxLen
);
984 memcpy(cursor
->key
+cursor
->keylen
+entry
->pfxLen
, entry
->string
, entry
->strlen
);
985 cursor
->key
[cursor
->keylen
+entry
->pfxLen
+entry
->strlen
] = 0;
986 if (entry
->payload
&& callback(ctx
, (const uint8_t *)cursor
->key
, entry
->payload
, lencompare
==0)) return;
989 cur
+= sizeof(*entry
) + entry
->strlen
;
993 PageEntry
*entry
= (PageEntry
*)&page
->data
[cur
];
994 int lencompare
= entry
->strlen
-len
;
995 if (lencompare
>= 0 && __builtin_memcmp(entry
->string
, cursor
->prefix
+cursor
->keylen
, len
) == 0) {
996 memcpy(cursor
->key
+cursor
->keylen
, entry
->string
, entry
->strlen
);
997 cursor
->key
[cursor
->keylen
+entry
->strlen
] = 0;
998 if (entry
->payload
&& callback(ctx
, (const uint8_t *)cursor
->key
, entry
->payload
, lencompare
==0)) return;
1000 cur
+= sizeof(*entry
) + entry
->strlen
;
1006 static void findCFBurstTrieLevel(CFBurstTrieRef trie
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1008 if (cursor
->keylen
< cursor
->prefixlen
) {
1009 cursor
->next
= ((TrieLevelRef
)NextTrie_GetPtr(cursor
->next
))->slots
[cursor
->prefix
[cursor
->keylen
]];
1010 cursor
->key
[cursor
->keylen
++] = cursor
->prefix
[cursor
->keylen
];
1012 if (NextTrie_GetKind(cursor
->next
) == TrieKind
) {
1013 findCFBurstTrieLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1014 } else if (NextTrie_GetKind(cursor
->next
) == ListKind
) {
1015 findCFBurstTrieList(trie
, cursor
, ctx
, callback
);
1018 TrieLevelRef root
= (TrieLevelRef
)NextTrie_GetPtr(cursor
->next
);
1019 if (root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1020 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1021 traverseCFBurstTrieLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1025 static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1027 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1028 if (cursor
->keylen
< cursor
->prefixlen
) {
1029 uint8_t mykey
= *(cursor
->prefix
+cursor
->keylen
);
1030 cursor
->key
[cursor
->keylen
++] = *(cursor
->prefix
+cursor
->keylen
);
1032 uint8_t slot
= mykey
/ 64;
1033 uint8_t bit
= mykey
% 64;
1035 uint64_t bword
= root
->bitmap
[slot
];
1037 if (bword
& (1ull << bit
)) {
1038 // ** Count all the set bits up to this bit
1039 for (int i
=0; i
< slot
; i
++) item
+= __builtin_popcountll(root
->bitmap
[i
]);
1040 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1041 uint32_t offset
= root
->slots
[item
];
1042 cursor
->next
= offset
;
1044 if (DiskNextTrie_GetKind(offset
) == TrieKind
) {
1045 findCFBurstTrieMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1046 } else if (DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1047 findCFBurstTrieCompactMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1048 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1049 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1053 if(root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1054 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1055 traverseCFBurstTrieCompactMappedLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1059 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1061 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1062 if (cursor
->keylen
< cursor
->prefixlen
) {
1063 uint8_t slot
= *(cursor
->prefix
+cursor
->keylen
);
1064 cursor
->next
= root
->slots
[slot
];
1065 cursor
->key
[cursor
->keylen
++] = slot
;
1067 if (DiskNextTrie_GetKind(cursor
->next
) == TrieKind
) {
1068 findCFBurstTrieMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1069 } else if (DiskNextTrie_GetKind(cursor
->next
) == CompactTrieKind
) {
1070 findCFBurstTrieCompactMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1071 } else if (DiskNextTrie_GetKind(cursor
->next
) == ListKind
) {
1072 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1075 if (root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1076 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1077 traverseCFBurstTrieMappedLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1081 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1083 cursor
->key
[cursor
->keylen
] = 0;
1084 uint32_t len
= cursor
->keylen
;
1085 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1086 NextTrie next
= root
->slots
[i
];
1087 cursor
->keylen
= len
;
1088 cursor
->key
[cursor
->keylen
++] = i
;
1090 if (NextTrie_GetKind(next
) == TrieKind
) {
1091 TrieLevelRef level
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1092 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1093 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1094 traverseCFBurstTrieLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1095 } else if (NextTrie_GetKind(next
) == ListKind
) {
1096 cursor
->next
= next
;
1097 cursor
->key
[cursor
->keylen
] = 0;
1098 findCFBurstTrieList(trie
, cursor
, ctx
, callback
);
1103 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1105 cursor
->key
[cursor
->keylen
] = 0;
1106 uint32_t len
= cursor
->keylen
;
1108 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1109 uint32_t offset
= (uint32_t)root
->slots
[i
];
1110 cursor
->keylen
= len
;
1111 cursor
->key
[cursor
->keylen
++] = i
;
1113 if (DiskNextTrie_GetKind(offset
) == TrieKind
) {
1114 MapTrieLevelRef level
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1115 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1116 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1117 traverseCFBurstTrieMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1118 } else if (DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1119 CompactMapTrieLevelRef level
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1120 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1121 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1122 traverseCFBurstTrieCompactMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1123 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1124 cursor
->next
= offset
;
1125 cursor
->key
[cursor
->keylen
] = 0;
1126 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1131 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, CompactMapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1133 cursor
->key
[cursor
->keylen
] = 0;
1134 uint32_t len
= cursor
->keylen
;
1135 for (uint32_t c
=0; c
< CHARACTER_SET_SIZE
; c
++) {
1136 //** This could be optimized to remember what the last slot/item was and not count bits over and over.
1138 uint32_t slot
= mykey
/ 64;
1139 uint32_t bit
= mykey
% 64;
1141 uint64_t bword
= root
->bitmap
[slot
];
1142 cursor
->keylen
= len
;
1144 if (bword
& (1ull << bit
)) {
1145 // ** Count all the set bits up to this bit
1146 for (int i
=0; i
< slot
; i
++) item
+= __builtin_popcountll(root
->bitmap
[i
]);
1147 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1148 uint32_t offset
= root
->slots
[item
];
1149 cursor
->key
[cursor
->keylen
++] = mykey
;
1151 if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1152 CompactMapTrieLevelRef level
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1153 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1154 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1155 traverseCFBurstTrieCompactMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1156 } else if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1157 traverseCFBurstTrieMappedLevel(trie
, (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
), cursor
, exactmatch
, ctx
, callback
);
1158 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1159 cursor
->next
= offset
;
1160 cursor
->key
[cursor
->keylen
] = 0;
1161 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1167 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool)) {
1168 if (trie
->mapBase
) {
1169 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
1170 fprintf(stderr
, "Please use CFBurstTrieCursorRef API for file based trie.\n");
1173 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1175 csr
.next
= header
->rootOffset
;
1176 csr
.prefix
= prefix
;
1177 csr
.prefixlen
= prefixLen
;
1180 findCFBurstTrieMappedLevel(trie
, &csr
, exactmatch
, ctx
, callback
);
1184 csr
.next
= ((unsigned long)&trie
->root
)|TrieKind
;
1185 csr
.prefix
= prefix
;
1186 csr
.prefixlen
= prefixLen
;
1189 findCFBurstTrieLevel(trie
, &csr
, exactmatch
, ctx
, callback
);
1193 CF_INLINE
uint32_t getPackedPageEntrySize(PageEntryPacked
*entry
)
1195 return sizeof(PageEntryPacked
) + entry
->strlen
;
1198 CF_INLINE
uint32_t getPageEntrySize(PageEntry
*entry
)
1200 return sizeof(PageEntry
) + entry
->strlen
;
1204 static void _printPageEntry(PageEntryPacked *entry) {
1205 printf("entry 0x%p:\n", entry);
1206 printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen);
1207 if (entry->strlen > 0) {
1208 printf("string = ");
1209 for (int i = 0; i < entry->strlen; ++i)
1210 printf("%d ", entry->string[i]);
1216 static Boolean
advanceCursorOnMappedPageForByte(Page
*page
, CompactMapCursor
*cursor
, UInt8 byte
) {
1217 PageEntryPacked
*entry
;
1218 Boolean found
= FALSE
;
1219 uint32_t minPrefixLength
= 0;
1221 if (cursor
->isOnPage
) {
1222 entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1223 //_printPageEntry(entry);
1224 BOOL shouldContinue
= TRUE
;
1226 if (!(cursor
->entryOffsetInPage
== 0 && entry
->strlen
== 0)) {
1227 if (cursor
->entryOffsetInPage
== entry
->strlen
- 1) {
1228 minPrefixLength
= entry
->pfxLen
+ entry
->strlen
;
1229 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1231 cursor
->offsetInEntry
++;
1232 if (entry
->string
[cursor
->offsetInEntry
] == byte
)
1234 else if (entry
->string
[cursor
->offsetInEntry
] > byte
)
1235 shouldContinue
= FALSE
;
1237 minPrefixLength
= entry
->pfxLen
+ cursor
->offsetInEntry
;
1238 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1244 cursor
->isOnPage
= TRUE
;
1246 } else if (!shouldContinue
)
1249 cursor
->entryOffsetInPage
= 0;
1252 uint32_t pageSize
= page
->length
- sizeof(Page
);
1253 while (cursor
->entryOffsetInPage
< pageSize
) {
1254 entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1255 //_printPageEntry(entry);
1256 if (minPrefixLength
> entry
->pfxLen
)
1258 else if (minPrefixLength
< entry
->pfxLen
)
1259 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1261 if (entry
->strlen
== 0)
1262 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1264 if (entry
->string
[0] > byte
)
1265 // Entries are sorted alphabetically
1267 else if (entry
->string
[0] < byte
)
1268 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1270 cursor
->offsetInEntry
= 0;
1279 cursor
->isOnPage
= TRUE
;
1284 static Boolean
advanceCursorMappedPageWithPerfixCompression(Page
*page
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1287 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[0];
1288 if (!cursor
->isOnPage
) {
1289 cursor
->entryOffsetInPage
= 0;
1290 cursor
->offsetInEntry
= 0;
1291 cursor
->isOnPage
= entry
->pfxLen
== 0 && entry
->strlen
== 0;
1293 getMapCursorPayloadFromPackedPageEntry(entry
, cursor
, &cursor
->payload
);
1297 for (CFIndex i
= 0; i
< length
; ++i
) {
1298 if (!advanceCursorOnMappedPageForByte(page
, cursor
, bytes
[i
]))
1301 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1302 getMapCursorPayloadFromPackedPageEntry(entry
, cursor
, &cursor
->payload
);
1306 static Boolean
advanceCursorMappedPageSortedByKey(Page
*page
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1309 PageEntry
*entry
= (PageEntry
*)&page
->data
[0];
1310 if (!cursor
->isOnPage
) {
1311 cursor
->entryOffsetInPage
= 0;
1312 cursor
->offsetInEntry
= 0;
1313 cursor
->isOnPage
= entry
->strlen
== 0;
1315 getMapCursorPayloadFromPageEntry(entry
, cursor
, &cursor
->payload
);
1320 uint32_t pageSize
= page
->length
- sizeof(Page
);
1321 const UInt8
* prefix
= NULL
;
1322 uint32_t prefixLength
= 0;
1324 if (cursor
->isOnPage
) {
1325 entry
= (PageEntry
*)&page
->data
[cursor
->entryOffsetInPage
];
1326 prefix
= entry
->string
;
1327 prefixLength
= cursor
->offsetInEntry
+ 1;
1330 while (cursor
->entryOffsetInPage
< pageSize
) {
1331 PageEntry
*entry
= (PageEntry
*)&page
->data
[cursor
->entryOffsetInPage
];
1332 if (entry
->strlen
== 0) {
1333 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1337 if (entry
->strlen
<= prefixLength
)
1341 if (prefixLength
> 0)
1342 cmpResult
= __builtin_memcmp(entry
->string
, prefix
, prefixLength
);
1345 else if (cmpResult
== 0) {
1346 if (entry
->strlen
< prefixLength
+ length
) {
1347 int cmpResult2
= __builtin_memcmp(entry
->string
+ prefixLength
, bytes
, entry
->strlen
- prefixLength
);
1351 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1353 int cmpResult2
= __builtin_memcmp(entry
->string
+ prefixLength
, bytes
, length
);
1356 else if (cmpResult2
== 0) {
1357 cursor
->isOnPage
= TRUE
;
1358 cursor
->offsetInEntry
= prefixLength
+ length
- 1;
1359 getMapCursorPayloadFromPageEntry(entry
, cursor
, &cursor
->payload
);
1362 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1365 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1371 static Boolean
advanceCursorMappedPage(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1373 if (!bytes
|| length
< 0)
1376 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1377 uint32_t pageSize
= page
->length
- sizeof(Page
);
1381 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1382 return advanceCursorMappedPageSortedByKey(page
, cursor
, bytes
, length
);
1383 else if (trie
->cflags
& kCFBurstTriePrefixCompression
)
1384 return advanceCursorMappedPageWithPerfixCompression(page
, cursor
, bytes
, length
);
1389 static Boolean
advanceCursorCompactMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1391 if (!bytes
|| length
< 0)
1394 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1396 cursor
->payload
= root
->payload
;
1400 uint8_t slot
= bytes
[0] / 64;
1401 uint8_t bit
= bytes
[0] % 64;
1403 uint64_t bword
= root
->bitmap
[slot
];
1404 if (bword
& (1ull << bit
)) {
1405 // Count all the set bits up to this bit
1406 for (int i
= 0; i
< slot
; ++i
)
1407 item
+= __builtin_popcountll(root
->bitmap
[i
]);
1408 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1409 cursor
->next
= root
->slots
[item
];
1410 return advanceMapCursor(trie
, cursor
, bytes
+ 1, length
- 1);
1416 static Boolean
advanceCursorMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1418 if (!bytes
|| length
< 0)
1421 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1423 cursor
->payload
= root
->payload
;
1427 cursor
->next
= root
->slots
[bytes
[0]];
1428 return advanceMapCursor(trie
, cursor
, bytes
+ 1, length
- 1);
1431 static Boolean
advanceMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1433 bool result
= FALSE
;
1434 switch (DiskNextTrie_GetKind(cursor
->next
)) {
1436 result
= advanceCursorMappedLevel(trie
, cursor
, bytes
, length
);
1438 case CompactTrieKind
:
1439 result
= advanceCursorCompactMappedLevel(trie
, cursor
, bytes
, length
);
1442 result
= advanceCursorMappedPage(trie
, cursor
, bytes
, length
);
1445 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1446 if (cursor
->next
== header
->rootOffset
)
1447 result
= advanceCursorMappedLevel(trie
, cursor
, bytes
, length
);
1454 static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1456 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1457 if (root
->payload
) {
1458 callback(ctx
, bytes
, length
, root
->payload
, stop
);
1463 if (length
>= capacity
)
1466 for (int i
= 0; i
< CHARACTER_SET_SIZE
; ++i
) {i
=
1468 cursor
->next
= (uint32_t)root
->slots
[i
];;
1469 cursor
->isOnPage
= FALSE
;
1470 cursor
->entryOffsetInPage
= 0;
1471 cursor
->offsetInEntry
= 0;
1472 cursor
->payload
= 0;
1473 traverseFromMapCursor(trie
, cursor
, bytes
, capacity
- 1, length
+ 1, stop
, ctx
, callback
);
1479 static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1481 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1482 if (root
->payload
) {
1483 callback(ctx
, bytes
, length
, root
->payload
, stop
);
1488 if (length
>= capacity
)
1490 for (int c
= 0; c
< CHARACTER_SET_SIZE
; ++c
) {
1492 //This could be optimized to remember what the last slot/item was and not count bits over and over.
1493 uint8_t slot
= c
/ 64;
1494 uint8_t bit
= c
% 64;
1496 uint64_t bword
= root
->bitmap
[slot
];
1497 if (bword
& (1ull << bit
)) {
1498 // Count all the set bits up to this bit
1499 for (int i
= 0; i
< slot
; ++i
)
1500 item
+= __builtin_popcountll(root
->bitmap
[i
]);
1501 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1502 cursor
->next
= root
->slots
[item
];
1503 cursor
->isOnPage
= FALSE
;
1504 cursor
->entryOffsetInPage
= 0;
1505 cursor
->offsetInEntry
= 0;
1506 cursor
->payload
= 0;
1507 traverseFromMapCursor(trie
, cursor
, bytes
, capacity
- 1, length
+ 1, stop
, ctx
, callback
);
1514 static void traverseFromMapCursorMappedPageWithPrefixCompression(Page
*page
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1516 uint32_t pageSize
= page
->length
- sizeof(Page
);
1517 uint32_t offset
= cursor
->entryOffsetInPage
;
1518 uint32_t minPrefixLength
= 0;
1519 if (cursor
->isOnPage
) {
1520 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[offset
];
1521 int32_t remainingLength
= entry
->strlen
- cursor
->offsetInEntry
- 1;
1522 if (remainingLength
>= 0 && remainingLength
<= capacity
) {
1523 memcpy(bytes
+ length
, entry
->string
+ cursor
->offsetInEntry
+ 1, remainingLength
);
1524 callback(ctx
, bytes
, length
+ remainingLength
, entry
->payload
, stop
);
1528 minPrefixLength
= entry
->pfxLen
+ cursor
->offsetInEntry
;
1529 offset
+= getPackedPageEntrySize(entry
);
1531 PageEntryPacked
*previousEntry
= NULL
;
1532 while (offset
< pageSize
) {
1533 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[offset
];
1534 if (minPrefixLength
> entry
->pfxLen
)
1536 else if (entry
->payload
&& entry
->strlen
<= capacity
) {
1538 length
-= previousEntry
->strlen
+ previousEntry
->pfxLen
- entry
->pfxLen
;
1539 memcpy(bytes
+ length
, entry
->string
, entry
->strlen
);
1540 callback(ctx
, bytes
, length
+ entry
->strlen
, entry
->payload
, stop
);
1541 length
+= entry
->strlen
;
1545 previousEntry
= entry
;
1546 offset
+= getPackedPageEntrySize(entry
);
1550 static void traverseFromMapCursorMappedPageSortedByKey(Page
*page
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1552 uint32_t pageSize
= page
->length
- sizeof(Page
);
1553 uint32_t offset
= cursor
->entryOffsetInPage
;
1554 uint32_t prefixLength
= 0;
1555 const UInt8
*prefix
= NULL
;
1556 if (cursor
->isOnPage
) {
1557 PageEntry
*entry
= (PageEntry
*)&page
->data
[offset
];
1558 int32_t remainingLength
= entry
->strlen
- cursor
->offsetInEntry
- 1;
1559 if (remainingLength
>= 0 && remainingLength
<= capacity
) {
1560 memcpy(bytes
+ length
, entry
->string
+ cursor
->offsetInEntry
, remainingLength
);
1561 callback(ctx
, bytes
, length
+ remainingLength
, entry
->payload
, stop
);
1565 prefixLength
= cursor
->offsetInEntry
+ 1;
1566 prefix
= entry
->string
;
1567 offset
+= getPageEntrySize(entry
);
1570 while (offset
< pageSize
) {
1571 PageEntry
*entry
= (PageEntry
*)&page
->data
[offset
];
1572 if (entry
->strlen
< prefixLength
)
1575 int cmpResult
= __builtin_memcmp(entry
->string
, prefix
, prefixLength
);
1578 else if (entry
->payload
&& entry
->strlen
<= capacity
) {
1579 if (entry
->strlen
> 0)
1580 memcpy(bytes
+ length
, entry
->string
+ prefixLength
, entry
->strlen
- prefixLength
);
1581 callback(ctx
, bytes
, length
+ entry
->strlen
- prefixLength
, entry
->payload
, stop
);
1585 offset
+= getPageEntrySize(entry
);
1590 static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1592 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1593 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1594 traverseFromMapCursorMappedPageSortedByKey(page
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1595 else if (trie
->cflags
& kCFBurstTriePrefixCompression
)
1596 traverseFromMapCursorMappedPageWithPrefixCompression(page
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1599 void traverseFromMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1601 switch (DiskNextTrie_GetKind(cursor
->next
)) {
1603 traverseFromMapCursorMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1605 case CompactTrieKind
:
1606 traverseFromMapCursorCompactMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1609 traverseFromMapCursorMappedPage(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1612 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1613 if (cursor
->next
== header
->rootOffset
) {
1614 traverseFromMapCursorMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1621 void copyMapCursor(const CompactMapCursor
*source
, CompactMapCursor
* destination
)
1623 destination
->next
= source
->next
;
1624 destination
->entryOffsetInPage
= source
->entryOffsetInPage
;
1625 destination
->offsetInEntry
= source
->offsetInEntry
;
1626 destination
->isOnPage
= source
->isOnPage
;
1627 destination
->payload
= source
->payload
;
1630 Boolean
areMapCursorsEqual(const CompactMapCursor
*lhs
, const CompactMapCursor
*rhs
)
1632 return lhs
->entryOffsetInPage
== rhs
->entryOffsetInPage
&& lhs
->isOnPage
== rhs
->isOnPage
&& lhs
->next
== rhs
->next
&& lhs
->offsetInEntry
== rhs
->offsetInEntry
;
1635 static Boolean
getMapCursorPayloadFromPackedPageEntry(PageEntryPacked
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1639 if (cursor
->entryOffsetInPage
== 0 && cursor
->offsetInEntry
== 0 && entry
->strlen
== 0) {
1641 *payload
= entry
->payload
;
1643 } else if (cursor
->offsetInEntry
!= entry
->strlen
- 1)
1647 *payload
= entry
->payload
;
1652 static Boolean
getMapCursorPayloadFromPageEntry(PageEntry
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1656 if (cursor
->entryOffsetInPage
== 0 && cursor
->offsetInEntry
== 0 && entry
->strlen
== 0) {
1658 *payload
= entry
->payload
;
1660 } else if (cursor
->offsetInEntry
!= entry
->strlen
- 1)
1664 *payload
= entry
->payload
;
1669 Boolean
getMapCursorPayload(CFBurstTrieRef trie
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1673 if (cursor
->payload
) {
1675 *payload
= cursor
->payload
;
1683 static Boolean
burstTrieMappedFind(DiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1684 Boolean success
= false;
1686 uint32_t offset
= CFSwapInt32LittleToHost((uint32_t)trie
->slots
[*key
]);
1687 if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1688 return burstTrieMappedFind((DiskTrieLevelRef
)DiskNextTrie_GetPtr(map
,offset
), map
, key
+1, length
-1, payload
, prefix
);
1689 } else if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1690 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1692 if(DiskNextTrie_GetKind(offset
) == ListKind
) {
1693 return burstTrieMappedPageFind((StringPage
*)DiskNextTrie_GetPtr(map
, offset
), key
+1, length
-1, payload
, prefix
);
1700 SetPayload(payload
, CFSwapInt32LittleToHost(trie
->payload
));
1707 static Boolean
burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1708 Boolean success
= false;
1710 uint32_t mykey
= *key
;
1711 uint32_t slot
= mykey
/ 64;
1712 uint32_t bit
= mykey
% 64;
1714 uint64_t bword
= CFSwapInt64LittleToHost(trie
->bitmap
[slot
]);
1715 if (bword
& (1ull << bit
)) {
1716 /* Count all the set bits up to this bit */
1717 for (int i
=0; i
< slot
; i
++) {
1718 item
+= __builtin_popcountll(trie
->bitmap
[i
]);
1720 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1721 uint32_t offset
= CFSwapInt32LittleToHost((uint32_t)trie
->slots
[item
]);
1722 if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1723 return burstTrieMappedFind((DiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1724 } else if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1725 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1728 if(DiskNextTrie_GetKind(offset
) == ListKind
) {
1729 return burstTrieMappedPageFind((StringPage
*)DiskNextTrie_GetPtr(map
, offset
), key
+1, length
-1, payload
, prefix
);
1737 SetPayload(payload
, CFSwapInt32LittleToHost(trie
->payload
));
1744 static Boolean
burstTrieMappedPageFind(StringPage
*page
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1745 Boolean success
= false;
1746 uint32_t end
= CFSwapInt32LittleToHost(page
->length
);
1751 StringPageEntryPacked
*entry
= (StringPageEntryPacked
*)&page
->data
[cur
];
1752 uint16_t strlen
= entry
->pfxLen
+CFSwapInt16LittleToHost(entry
->strlen
);
1753 if (strlen
== length
&& __builtin_memcmp(pfx
, key
, entry
->pfxLen
) == 0 && __builtin_memcmp(entry
->string
, key
+entry
->pfxLen
, length
-entry
->pfxLen
) == 0) {
1754 SetPayload(payload
, CFSwapInt32LittleToHost(entry
->payload
));
1758 memcpy(pfx
+entry
->pfxLen
, entry
->string
, MIN(255-entry
->pfxLen
, length
-entry
->pfxLen
));
1759 cur
+= sizeof(*entry
) + strlen
- entry
->pfxLen
;
1764 StringPageEntry
*entry
= (StringPageEntry
*)&page
->data
[cur
];
1765 uint16_t strlen
= CFSwapInt16LittleToHost(entry
->strlen
);
1766 if (strlen
== length
&& __builtin_memcmp(entry
->string
, key
, length
) == 0) {
1767 SetPayload(payload
, CFSwapInt32LittleToHost(entry
->payload
));
1771 cur
+= sizeof(*entry
) + strlen
;
1782 #pragma mark Serialization
1785 static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie
, TrieLevelRef root
, uint32_t *offset
, off_t start_offset
, bool dispose
, bool isroot
, int fd
)
1790 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) if (root
->slots
[i
]) count
++;
1792 uint32_t this_offset
= *offset
;
1794 if ((trie
->cflags
& kCFBurstTrieBitmapCompression
) && count
< MAX_BITMAP_SIZE
&& !isroot
) {
1795 size_t size
= sizeof(CompactMapTrieLevel
) + sizeof(uint32_t) * count
;
1798 CompactMapTrieLevel
*maptrie
= (CompactMapTrieLevel
*)alloca(size
);
1799 bzero(maptrie
, size
);
1802 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1803 NextTrie next
= root
->slots
[i
];
1805 uint32_t slot
= i
/ 64;
1806 uint32_t bit
= i
% 64;
1807 maptrie
->bitmap
[slot
] |= 1ull<<bit
;
1808 if (NextTrie_GetKind(next
) == TrieKind
) {
1809 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1810 uint32_t childOffset
= *offset
;
1811 if (serializeCFBurstTrieLevels(trie
, nextLevel
, offset
, start_offset
, true, false, fd
)) {
1812 maptrie
->slots
[offsetSlot
] = (TrieKind
|childOffset
);
1814 maptrie
->slots
[offsetSlot
] = (CompactTrieKind
|childOffset
);
1817 maptrie
->slots
[offsetSlot
] = next
;
1822 maptrie
->payload
= root
->payload
;
1825 for (int i
=0; i
< 4; i
++) bitcount
+= __builtin_popcountll(maptrie
->bitmap
[i
]);
1826 assert(bitcount
== count
);
1828 pwrite(fd
, maptrie
, size
, this_offset
+start_offset
);
1831 MapTrieLevel maptrie
;
1832 *offset
+= sizeof(maptrie
);
1834 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1835 NextTrie next
= root
->slots
[i
];
1836 if (NextTrie_GetKind(next
) == TrieKind
) {
1837 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1838 uint32_t childOffset
= *offset
;
1839 if (serializeCFBurstTrieLevels(trie
, nextLevel
, offset
, start_offset
, true, false, fd
)) {
1840 maptrie
.slots
[i
] = (TrieKind
|childOffset
);
1842 maptrie
.slots
[i
] = (CompactTrieKind
|childOffset
);
1845 maptrie
.slots
[i
] = next
;
1848 maptrie
.payload
= root
->payload
;
1849 pwrite(fd
, &maptrie
, sizeof(maptrie
), this_offset
+start_offset
);
1852 if (dispose
) free(root
);
1856 static void serializeCFBurstTrieList(CFBurstTrieRef trie
, ListNodeRef listNode
, int fd
)
1859 size_t size
= trie
->containerSize
;
1861 // ** Temp list of nodes to sort
1862 ListNodeRef
*nodes
= (ListNodeRef
*)malloc(sizeof(ListNodeRef
) * size
);
1863 for (listCount
= 0; listNode
; listCount
++) {
1864 if (listCount
>= size
) {
1866 nodes
= (ListNodeRef
*)realloc(nodes
, sizeof(ListNodeRef
) * size
);
1868 nodes
[listCount
] = listNode
;
1869 listNode
= listNode
->next
;
1872 char _buffer
[MAX_BUFFER_SIZE
];
1873 size_t bufferSize
= (sizeof(Page
) + size
* (sizeof(PageEntryPacked
) + MAX_STRING_SIZE
));
1874 char *buffer
= bufferSize
< MAX_BUFFER_SIZE
? _buffer
: (char *) malloc(bufferSize
);
1876 Page
*page
= (Page
*)buffer
;
1877 uint32_t current
= 0;
1879 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
1880 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeStringCompare
);
1882 ListNodeRef last
= 0;
1883 for (int i
=0; i
< listCount
; i
++) {
1884 listNode
= nodes
[i
];
1888 pfxLen
< CHARACTER_SET_SIZE
-1 &&
1889 pfxLen
< listNode
->length
&&
1890 pfxLen
< last
->length
&&
1891 listNode
->string
[pfxLen
] == last
->string
[pfxLen
];
1895 PageEntryPacked
*entry
= (PageEntryPacked
*)(&page
->data
[current
]);
1896 entry
->strlen
= listNode
->length
- pfxLen
;
1897 entry
->payload
= listNode
->payload
;
1898 entry
->pfxLen
= pfxLen
;
1899 memcpy(entry
->string
, listNode
->string
+pfxLen
, listNode
->length
-pfxLen
);
1900 current
+= listNode
->length
- pfxLen
+ sizeof(PageEntryPacked
);
1904 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1905 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeStringCompare
);
1907 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeWeightCompare
);
1909 for (int i
=0; i
< listCount
; i
++) {
1910 listNode
= nodes
[i
];
1911 PageEntry
*entry
= (PageEntry
*)(&page
->data
[current
]);
1912 entry
->strlen
= listNode
->length
;
1913 entry
->payload
= listNode
->payload
;
1914 memcpy(entry
->string
, listNode
->string
, listNode
->length
);
1915 current
+= listNode
->length
+ sizeof(PageEntry
);
1919 size_t len
= (sizeof(Page
) + current
+ 3) & ~3;
1920 page
->length
= current
;
1921 write(fd
, page
, len
);
1924 if (buffer
!= _buffer
) free(buffer
);
1927 static void serializeCFBurstTrieLists(CFBurstTrieRef trie
, TrieLevelRef root
, off_t start_offset
, int fd
)
1929 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1930 NextTrie next
= root
->slots
[i
];
1932 if (NextTrie_GetKind(next
) == TrieKind
) {
1933 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1934 serializeCFBurstTrieLists(trie
, nextLevel
, start_offset
, fd
);
1936 if (NextTrie_GetKind(next
) == ListKind
) {
1937 ListNodeRef listNode
= (ListNodeRef
)NextTrie_GetPtr(next
);
1938 offset
= lseek(fd
, 0, SEEK_CUR
) - start_offset
;
1939 serializeCFBurstTrieList(trie
, listNode
, fd
);
1940 finalizeCFBurstTrieList(listNode
);
1941 //assert((offset & 3)==0);
1942 root
->slots
[i
] = (offset
|ListKind
);
1948 static size_t serializeCFBurstTrie(CFBurstTrieRef trie
, size_t start_offset
, int fd
)
1951 header
.signature
= 0x0ddba11;
1952 header
.rootOffset
= 0;
1953 header
.count
= trie
->count
;
1955 header
.flags
= trie
->cflags
;
1956 header
.reserved
[0] = 0;
1959 lseek(fd
, start_offset
, SEEK_SET
);
1961 size_t header_size
= sizeof(header
);
1962 write(fd
, &header
, header_size
);
1964 serializeCFBurstTrieLists(trie
, &trie
->root
, start_offset
, fd
);
1966 offset
= lseek(fd
, 0, SEEK_CUR
) - start_offset
;
1967 size_t off
= offsetof(TrieHeader
, rootOffset
);
1968 size_t offsize
= sizeof(offset
);
1969 pwrite(fd
, &offset
, offsize
, off
+start_offset
);
1971 serializeCFBurstTrieLevels(trie
, &trie
->root
, &offset
, start_offset
, false, true, fd
);
1973 size_t off2
= offsetof(TrieHeader
, size
);
1974 offsize
= sizeof(offset
);
1975 pwrite(fd
, &offset
, offsize
, off2
+start_offset
);
1977 offset
= lseek(fd
, 0, SEEK_END
);
1978 return (size_t)(offset
-start_offset
);
1983 #pragma mark Release
1986 static void destroyCFBurstTrie(CFBurstTrieRef trie
) {
1987 if (trie
->mapBase
) {
1988 #if DEPLOYMENT_TARGET_WINDOWS
1989 UnmapViewOfFile(trie
->mapBase
);
1990 CloseHandle(trie
->mapHandle
);
1991 CloseHandle(trie
->mappedFileHandle
);
1993 munmap(trie
->mapBase
, trie
->mapSize
);
1996 finalizeCFBurstTrie(&trie
->root
);
2002 static void finalizeCFBurstTrie(TrieLevelRef trie
) {
2003 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
2004 if (NextTrie_GetKind(trie
->slots
[i
]) == TrieKind
) {
2005 finalizeCFBurstTrie((TrieLevelRef
)NextTrie_GetPtr(trie
->slots
[i
]));
2006 free((void *)NextTrie_GetPtr(trie
->slots
[i
]));
2007 } else if (NextTrie_GetKind(trie
->slots
[i
]) == ListKind
) {
2008 finalizeCFBurstTrieList((ListNodeRef
)NextTrie_GetPtr(trie
->slots
[i
]));
2013 static void finalizeCFBurstTrieList(ListNodeRef node
) {
2015 ListNodeRef next
= node
->next
;
2023 #pragma mark Helpers
2026 static int nodeWeightCompare(const void *a
, const void *b
) {
2027 ListNodeRef nodeA
= *(ListNodeRef
*)a
;
2028 ListNodeRef nodeB
= *(ListNodeRef
*)b
;
2029 return (nodeB
->weight
- nodeA
->weight
);
2032 static int nodeStringCompare(const void *a
, const void *b
) {
2033 ListNodeRef nodeA
= *(ListNodeRef
*)a
;
2034 ListNodeRef nodeB
= *(ListNodeRef
*)b
;
2035 int result
= memcmp((char *)nodeA
->string
, (char *)nodeB
->string
, MIN(nodeA
->length
, nodeB
->length
));
2036 if (result
== 0) result
= nodeA
->length
-nodeB
->length
;
2040 static bool containsKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
)
2042 uint32_t *ctx
= (uint32_t *)context
;
2043 if (exact
) *ctx
= payload
;
2047 static CFIndex
burstTrieConvertCharactersToUTF8(UniChar
*chars
, CFIndex numChars
, UInt8
*buffer
) {
2049 for (i
= j
= 0; i
< numChars
; i
++) {
2050 UniChar c
= chars
[i
];
2051 if (CFStringIsSurrogateHighCharacter(c
) && i
+ 1 < numChars
&& CFStringIsSurrogateLowCharacter(chars
[i
+ 1])) {
2052 UTF32Char lc
= CFStringGetLongCharacterForSurrogatePair(c
, chars
[i
+ 1]);
2053 buffer
[j
++] = 0xf0 + (lc
>> 18);
2054 buffer
[j
++] = 0x80 + ((lc
& 0x3ffff) >> 12);
2055 buffer
[j
++] = 0x80 + ((lc
& 0xfff) >> 6);
2056 buffer
[j
++] = 0x80 + (lc
& 0x3f);
2061 } else if (c
< 0x800) {
2062 buffer
[j
++] = 0xc0 + (c
>> 6);
2063 buffer
[j
++] = 0x80 + (c
& 0x3f);
2065 buffer
[j
++] = 0xe0 + (c
>> 12);
2066 buffer
[j
++] = 0x80 + ((c
& 0xfff) >> 6);
2067 buffer
[j
++] = 0x80 + (c
& 0x3f);