2 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 2008-2014, Apple Inc. All rights reserved.
26 Responsibility: Jennifer Moore
29 #include "CFInternal.h"
30 #include "CFBurstTrie.h"
31 #include "CFByteOrder.h"
39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
41 #include <sys/param.h>
48 #if DEPLOYMENT_TARGET_WINDOWS
50 #define statinfo _stat
51 #define stat(x,y) _NS_stat(x,y)
52 #define __builtin_memcmp(x, y, z) memcmp(x, y, z)
53 #define __builtin_popcountll(x) popcountll(x)
54 #define bzero(dst, size) ZeroMemory(dst, size)
58 static int pwrite(int fd
, const void *buf
, size_t nbyte
, off_t offset
) {
63 _lseek(fd
, offset
, SEEK_SET
);
66 int res
= _write(fd
, buf
, nbyte
);
68 // Return to previous offset
69 _lseek(fd
, pos
, SEEK_SET
);
79 #pragma mark Types and Utilities
82 #define MAX_STRING_ALLOCATION_SIZE 342
83 #define MAX_STRING_SIZE 1024
84 #define MAX_KEY_LENGTH MAX_STRING_SIZE * 4
85 #define CHARACTER_SET_SIZE 256
86 #define MAX_LIST_SIZE 256 // 64
87 #define MAX_BITMAP_SIZE 200
88 #define MAX_BUFFER_SIZE (4096<<2)
90 #define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3))
91 #define NextTrie_GetKind(p) (p & 3)
92 #define NextTrie_SetKind(p, kind) (p |= (3&kind))
94 #define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3)))
95 #define DiskNextTrie_GetKind(p) (p & 3)
96 #define DiskNextTrie_SetKind(p, kind) (p |= (3&kind))
98 // Use this macro to avoid forgetting to check the pointer before assigning value to it.
99 #define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0)
101 enum { Nothing
= 0, TrieKind
= 1, ListKind
= 2, CompactTrieKind
= 3 };
102 typedef enum { FailedInsert
= 0, NewTerm
= 1, ExistingTerm
= 2 } CFBTInsertCode
;
105 typedef uintptr_t NextTrie
;
107 typedef struct _TrieLevel
{
108 NextTrie slots
[CHARACTER_SET_SIZE
];
112 typedef TrieLevel
*TrieLevelRef
;
114 typedef struct _MapTrieLevel
{
115 uint32_t slots
[CHARACTER_SET_SIZE
];
118 typedef MapTrieLevel
*MapTrieLevelRef
;
120 typedef struct _CompactMapTrieLevel
{
121 uint64_t bitmap
[CHARACTER_SET_SIZE
/ 64];
124 } CompactMapTrieLevel
;
125 typedef CompactMapTrieLevel
*CompactMapTrieLevelRef
;
127 typedef struct _ListNode
{
128 struct _ListNode
*next
;
135 typedef struct _Page
{
140 typedef struct _PageEntryPacked
{
147 typedef struct _PageEntry
{
153 typedef struct _TrieHeader
{
159 uint64_t reserved
[16];
162 typedef struct _TrieCursor
{
168 const uint8_t *prefix
;
169 uint8_t key
[MAX_KEY_LENGTH
];
172 typedef struct _MapCursor
{
178 const uint8_t *prefix
;
179 uint8_t key
[MAX_STRING_SIZE
*4];
182 typedef struct _CompactMapCursor
{
184 uint32_t entryOffsetInPage
;
185 uint32_t offsetInEntry
;
187 // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether
188 // the cursor is merely pointing at the beginning of the page, or the first entry.
189 // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level,
190 // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor
191 // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and
192 // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the
193 // trie. the page level will actually contains three entries. The first entry corresponds to string "a".
194 // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will
195 // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still
196 // set to 0s. So we need an additional variable to make distinction between these two situations.
199 typedef struct _CompactMapCursor
*MapCursorRef
;
202 _kCFBurstTrieCursorTrieType
= 0,
203 _kCFBurstTrieCursorMapType
206 typedef struct _CFBurstTrieCursor
{
207 CompactMapCursor mapCursor
;
210 } _CFBurstTrieCursor
;
213 typedef struct _DiskTrieLevel
{
214 uint32_t slots
[CHARACTER_SET_SIZE
];
218 typedef DiskTrieLevel
*DiskTrieLevelRef
;
220 typedef struct _CompactDiskTrieLevel
{
221 uint64_t bitmap
[CHARACTER_SET_SIZE
/ 64]; // CHARACTER_SET_SIZE / 64bits per word
225 } CompactDiskTrieLevel
;
226 typedef CompactDiskTrieLevel
*CompactDiskTrieLevelRef
;
228 typedef struct _StringPage
{
233 typedef struct _StringPageEntryPacked
{
235 uint16_t strlen
; // make uint8_t if possible
238 } StringPageEntryPacked
;
240 typedef struct _StringPageEntry
{
241 uint16_t strlen
; // make uint8_t if possible
246 typedef struct _fileHeader
{
256 struct _CFBurstTrie
{
259 DiskTrieLevel diskRoot
;
260 MapTrieLevel maproot
;
267 uint32_t containerSize
;
269 #if DEPLOYMENT_TARGET_WINDOWS
271 HANDLE mappedFileHandle
;
277 #pragma mark Forward declarations
280 typedef struct _TraverseContext
{
282 void (*callback
)(void*, const UInt8
*, uint32_t, uint32_t);
285 static bool foundKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
)
287 if (context
!= NULL
) {
288 TraverseContext
*ctx
= (TraverseContext
*)context
;
289 if (ctx
->context
&& ctx
->callback
) {
290 ctx
->callback(ctx
->context
, key
, 1, payload
);
296 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
298 static CFBTInsertCode
addCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
);
300 static void findCFBurstTrieLevel(CFBurstTrieRef trie
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool));
301 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool));
302 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
303 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
304 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, CompactMapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
305 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
307 static size_t serializeCFBurstTrie(CFBurstTrieRef trie
, size_t start_offset
, int fd
);
309 static Boolean
burstTrieMappedFind(DiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
310 static Boolean
burstTrieMappedPageFind(StringPage
*page
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
311 static Boolean
burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
313 static void destroyCFBurstTrie(CFBurstTrieRef trie
);
314 static void finalizeCFBurstTrie(TrieLevelRef trie
);
315 static void finalizeCFBurstTrieList(ListNodeRef node
);
317 static int nodeWeightCompare(const void *a
, const void *b
);
318 static int nodeStringCompare(const void *a
, const void *b
);
320 static bool foundKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
);
321 static bool containsKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
);
323 static CFIndex
burstTrieConvertCharactersToUTF8(UniChar
*chars
, CFIndex numChars
, UInt8
*buffer
);
325 static Boolean
advanceMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
);
326 static Boolean
getMapCursorPayload(CFBurstTrieRef trie
, const CompactMapCursor
*cursor
, uint32_t *payload
);
327 static void copyMapCursor(const CompactMapCursor
*source
, CompactMapCursor
* destination
);
328 static Boolean
areMapCursorsEqual(const CompactMapCursor
*lhs
, const CompactMapCursor
*rhs
);
329 static void traverseFromMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
);
330 static Boolean
getMapCursorPayloadFromPackedPageEntry(PageEntryPacked
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
);
331 static Boolean
getMapCursorPayloadFromPageEntry(PageEntry
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
);
333 CFBurstTrieRef
CFBurstTrieCreateWithOptions(CFDictionaryRef options
) {
334 CFBurstTrieRef trie
= NULL
;
335 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
336 trie
->containerSize
= MAX_LIST_SIZE
;
338 CFNumberRef valueAsCFNumber
;
339 if (CFDictionaryGetValueIfPresent(options
, kCFBurstTrieCreationOptionNameContainerSize
, (const void **)&valueAsCFNumber
)) {
341 CFNumberGetValue(valueAsCFNumber
, kCFNumberIntType
, &value
);
342 trie
->containerSize
= value
> 2 && value
< 4096 ? value
: MAX_LIST_SIZE
;
348 CFBurstTrieRef
CFBurstTrieCreate() {
349 CFBurstTrieRef trie
= NULL
;
350 int listSize
= MAX_LIST_SIZE
;
351 CFNumberRef value
= CFNumberCreate(kCFAllocatorDefault
, kCFNumberIntType
, &listSize
);
352 CFMutableDictionaryRef options
= CFDictionaryCreateMutable(kCFAllocatorDefault
, 1, NULL
, NULL
);
353 CFDictionarySetValue(options
, kCFBurstTrieCreationOptionNameContainerSize
, value
);
354 trie
= CFBurstTrieCreateWithOptions(options
);
360 CFBurstTrieRef
CFBurstTrieCreateFromFile(CFStringRef path
) {
362 char filename
[PATH_MAX
];
365 /* Check valid path name */
366 if (!CFStringGetCString(path
, filename
, PATH_MAX
, kCFStringEncodingUTF8
)) return NULL
;
368 /* Check if file exists */
369 if (stat(filename
, &sb
) != 0) return NULL
;
371 /* Check if file can be opened */
372 if ((fd
=open(filename
, CF_OPENFLGS
|O_RDONLY
)) < 0) return NULL
;
374 #if DEPLOYMENT_TARGET_WINDOWS
375 HANDLE mappedFileHandle
= (HANDLE
)_get_osfhandle(fd
);
376 if (!mappedFileHandle
) return NULL
;
378 HANDLE mapHandle
= CreateFileMapping(mappedFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
379 if (!mapHandle
) return NULL
;
381 char *map
= (char *)MapViewOfFile(mapHandle
, FILE_MAP_READ
, 0, 0, sb
.st_size
);
382 if (!map
) return NULL
;
384 char *map
= mmap(0, sb
.st_size
, PROT_READ
, MAP_FILE
|MAP_SHARED
, fd
, 0);
387 CFBurstTrieRef trie
= NULL
;
388 TrieHeader
*header
= (TrieHeader
*)map
;
390 if (((uint32_t*)map
)[0] == 0xbabeface) {
391 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
393 trie
->mapSize
= CFSwapInt32LittleToHost(sb
.st_size
);
394 trie
->mapOffset
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->rootOffset
);
395 trie
->cflags
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->flags
);
396 trie
->count
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->count
);
398 #if DEPLOYMENT_TARGET_WINDOWS
399 trie
->mappedFileHandle
= mappedFileHandle
;
400 trie
->mapHandle
= mapHandle
;
402 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
405 } else if (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11) {
406 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
408 trie
->mapSize
= CFSwapInt32LittleToHost(sb
.st_size
);
409 trie
->cflags
= CFSwapInt32LittleToHost(header
->flags
);
410 trie
->count
= CFSwapInt32LittleToHost(header
->count
);
412 #if DEPLOYMENT_TARGET_WINDOWS
413 trie
->mappedFileHandle
= mappedFileHandle
;
414 trie
->mapHandle
= mapHandle
;
416 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
425 CFBurstTrieRef
CFBurstTrieCreateFromMapBytes(char *mapBase
) {
426 CFBurstTrieRef trie
= NULL
;
427 TrieHeader
*header
= (TrieHeader
*)mapBase
;
429 if (mapBase
&& ((uint32_t*)mapBase
)[0] == 0xbabeface) {
430 trie
= (CFBurstTrieRef
) malloc(sizeof(struct _CFBurstTrie
));
431 trie
->mapBase
= mapBase
;
432 trie
->mapSize
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->size
);
433 trie
->mapOffset
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->rootOffset
);
434 trie
->cflags
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->flags
);
435 trie
->count
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->count
);
437 } else if (mapBase
&& (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11)) {
438 trie
= (CFBurstTrieRef
) malloc(sizeof(struct _CFBurstTrie
));
439 trie
->mapBase
= mapBase
;
440 trie
->mapSize
= CFSwapInt32LittleToHost(header
->size
);
441 trie
->cflags
= CFSwapInt32LittleToHost(header
->flags
);
442 trie
->count
= CFSwapInt32LittleToHost(header
->count
);
448 Boolean
CFBurstTrieInsert(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex payload
) {
449 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, 1, (uint32_t)payload
);
452 Boolean
CFBurstTrieAdd(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t payload
) {
453 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, 1, payload
);
456 Boolean
CFBurstTrieInsertCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex payload
) {
457 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, 1, (uint32_t)payload
);
460 Boolean
CFBurstTrieAddCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t payload
) {
461 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, 1, payload
);
464 Boolean
CFBurstTrieInsertUTF8String(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, CFIndex payload
) {
465 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, 1, (uint32_t)payload
);
468 Boolean
CFBurstTrieAddUTF8String(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, uint32_t payload
) {
469 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, 1, payload
);
472 Boolean
CFBurstTrieInsertWithWeight(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex weight
, CFIndex payload
) {
473 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, weight
, (uint32_t)payload
);
476 Boolean
CFBurstTrieAddWithWeight(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t weight
, uint32_t payload
) {
477 Boolean success
= false;
478 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
479 CFIndex bytesize
= termRange
.length
* 4; //** 4-byte max character size
480 if (!trie
->mapBase
&& termRange
.length
< MAX_STRING_SIZE
&& payload
> 0) {
482 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
484 if (bytesize
>= size
) {
486 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
488 CFStringGetBytes(term
, termRange
, kCFStringEncodingUTF8
, (UInt8
)'-', (Boolean
)0, key
, size
, &length
);
491 success
= CFBurstTrieAddUTF8StringWithWeight(trie
, key
, length
, weight
, payload
);
492 if (buffer
!= key
) free(key
);
497 Boolean
CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex weight
, CFIndex payload
) {
498 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, weight
, (uint32_t)payload
);
501 Boolean
CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t weight
, uint32_t payload
) {
502 Boolean success
= false;
503 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
504 CFIndex bytesize
= numChars
* 4; //** 4-byte max character size
505 if (!trie
->mapBase
&& numChars
< MAX_STRING_SIZE
&& payload
> 0) {
507 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
509 if (bytesize
>= size
) {
511 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
513 length
= burstTrieConvertCharactersToUTF8(chars
, numChars
, key
);
516 success
= CFBurstTrieAddUTF8StringWithWeight(trie
, key
, length
, weight
, payload
);
517 if (buffer
!= key
) free(key
);
522 Boolean
CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, CFIndex weight
, CFIndex payload
) {
523 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, weight
, (uint32_t)payload
);
526 Boolean
CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, uint32_t weight
, uint32_t payload
) {
527 CFBTInsertCode code
= FailedInsert
;
529 if (!trie
->mapBase
&& numChars
< MAX_STRING_SIZE
*4 && payload
> 0) {
530 code
= addCFBurstTrieLevel(trie
, &trie
->root
, chars
, numChars
, weight
, payload
);
531 if (code
== NewTerm
) trie
->count
++;
533 return code
> FailedInsert
;
536 Boolean
CFBurstTrieFind(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex
*payload
) {
538 if (CFBurstTrieContains(trie
, term
, termRange
, &p
)) {
539 SetPayload(payload
, p
);
545 Boolean
CFBurstTrieContains(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t *payload
) {
546 Boolean success
= false;
547 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
548 CFIndex bytesize
= termRange
.length
* 4; //** 4-byte max character size
549 if (termRange
.length
< MAX_STRING_SIZE
) {
551 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+1];
553 if (bytesize
>= size
) {
555 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
557 CFStringGetBytes(term
, termRange
, kCFStringEncodingUTF8
, (UInt8
)'-', (Boolean
)0, key
, size
, &length
);
560 success
= CFBurstTrieContainsUTF8String(trie
, key
, length
, payload
);
561 if (buffer
!= key
) free(key
);
566 Boolean
CFBurstTrieFindCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex
*payload
) {
568 if (CFBurstTrieContainsCharacters(trie
, chars
, numChars
, &p
)) {
569 SetPayload(payload
, p
);
575 Boolean
CFBurstTrieContainsCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t *payload
) {
576 Boolean success
= false;
577 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
578 CFIndex bytesize
= numChars
* 4; //** 4-byte max character size
579 if (numChars
< MAX_STRING_SIZE
) {
581 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
583 if (bytesize
>= size
) {
585 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
587 length
= burstTrieConvertCharactersToUTF8(chars
, numChars
, key
);
590 success
= CFBurstTrieContainsUTF8String(trie
, key
, length
, payload
);
591 if (buffer
!= key
) free(key
);
596 Boolean
CFBurstTrieFindUTF8String(CFBurstTrieRef trie
, UInt8
*key
, CFIndex length
, CFIndex
*payload
) {
598 if (CFBurstTrieContainsUTF8String(trie
, key
, length
, &p
)) {
599 SetPayload(payload
, p
);
605 Boolean
CFBurstTrieContainsUTF8String(CFBurstTrieRef trie
, UInt8
*key
, CFIndex length
, uint32_t *payload
) {
606 Boolean success
= false;
607 if (length
< MAX_STRING_SIZE
) {
608 if (trie
->mapBase
&& ((fileHeader
*)trie
->mapBase
)->signature
== 0xbabeface) {
609 bool prefix
= (trie
->cflags
& kCFBurstTriePrefixCompression
);
610 success
= burstTrieMappedFind((DiskTrieLevelRef
)(trie
->mapBase
+CFSwapInt32LittleToHost((((uint32_t*)trie
->mapBase
)[1]))), trie
->mapBase
, key
, length
, payload
, prefix
);
611 } else if (trie
->mapBase
&& trie
->cflags
& (kCFBurstTriePrefixCompression
| kCFBurstTrieSortByKey
)) {
612 _CFBurstTrieCursor cursor
;
613 if (!CFBurstTrieSetCursorForBytes(trie
, &cursor
, key
, length
))
615 return CFBurstTrieCursorGetPayload(&cursor
, payload
);
619 traverseCFBurstTrieWithCursor(trie
, key
, length
, &cursor
, true, &found
, containsKey
);
620 if (found
) SetPayload(payload
, found
);
627 Boolean
CFBurstTrieSerialize(CFBurstTrieRef trie
, CFStringRef path
, CFBurstTrieOpts opts
) {
628 Boolean success
= false;
633 char filename
[PATH_MAX
];
635 /* Check valid path name */
636 if (!CFStringGetCString(path
, filename
, PATH_MAX
, kCFStringEncodingUTF8
)) return success
;
638 /* Check if file can be opened */
639 if ((fd
=open(filename
, CF_OPENFLGS
|O_RDWR
|O_CREAT
|O_TRUNC
, S_IRUSR
|S_IWUSR
)) < 0) return success
;
641 if (CFBurstTrieSerializeWithFileDescriptor(trie
, fd
, opts
)) success
= true;
648 Boolean
CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie
, int fd
, CFBurstTrieOpts opts
) {
649 Boolean success
= false;
650 if (!trie
->mapBase
&& fd
>= 0) {
651 off_t start_offset
= lseek(fd
, 0, SEEK_END
);
654 trie
->mapSize
= serializeCFBurstTrie(trie
, start_offset
, fd
);
656 #if DEPLOYMENT_TARGET_WINDOWS
657 HANDLE mappedFileHandle
= (HANDLE
)_get_osfhandle(fd
);
658 // We need to make sure we have our own handle to keep this file open as long as the mmap lasts
659 DuplicateHandle(GetCurrentProcess(), mappedFileHandle
, GetCurrentProcess(), &mappedFileHandle
, 0, 0, DUPLICATE_SAME_ACCESS
);
660 HANDLE mapHandle
= CreateFileMapping(mappedFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
661 if (!mapHandle
) return NULL
;
662 char *map
= (char *)MapViewOfFile(mapHandle
, FILE_MAP_READ
, 0, start_offset
, trie
->mapSize
);
663 if (!map
) return NULL
;
665 trie
->mapHandle
= mapHandle
;
666 trie
->mappedFileHandle
= mappedFileHandle
;
668 trie
->mapBase
= mmap(0, trie
->mapSize
, PROT_READ
, MAP_FILE
|MAP_SHARED
, fd
, start_offset
);
676 void CFBurstTrieTraverse(CFBurstTrieRef trie
, void *ctx
, void (*callback
)(void*, const UInt8
*, uint32_t, uint32_t)) {
677 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
678 if (!trie
->mapBase
|| (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11)) {
680 TraverseContext context
;
681 context
.context
= ctx
;
682 context
.callback
= callback
;
683 traverseCFBurstTrieWithCursor(trie
, (const uint8_t *)"", 0, &cursor
, false, &context
, foundKey
);
688 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
690 traverseCFBurstTrieWithCursor(trie
, prefix
, prefixLen
, cursor
, false, ctx
, callback
);
693 void CFBurstTriePrint(CFBurstTrieRef trie
) {
697 CFIndex
CFBurstTrieGetCount(CFBurstTrieRef trie
) {
701 CFBurstTrieRef
CFBurstTrieRetain(CFBurstTrieRef trie
) {
706 void CFBurstTrieRelease(CFBurstTrieRef trie
) {
708 if (trie
->retain
== 0) destroyCFBurstTrie(trie
);
712 Boolean
CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie
, CFBurstTrieCursorRef cursor
, const UInt8
* bytes
, CFIndex length
)
714 if (!trie
->mapBase
|| !(trie
->cflags
& (kCFBurstTriePrefixCompression
| kCFBurstTrieSortByKey
))) {
715 //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n");
719 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
720 if (length
< 0 || !trie
)
725 cursor
->cursorType
= _kCFBurstTrieCursorMapType
;
726 cursor
->mapCursor
.next
= header
->rootOffset
;
727 cursor
->mapCursor
.isOnPage
= FALSE
;
728 cursor
->mapCursor
.entryOffsetInPage
= 0;
729 cursor
->mapCursor
.offsetInEntry
= 0;
730 cursor
->mapCursor
.payload
= 0;
734 if (!bytes
|| length
== 0)
737 return CFBurstTrieCursorAdvanceForBytes(cursor
, bytes
, length
);
741 CFBurstTrieCursorRef
CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie
, const UInt8
* bytes
, CFIndex length
)
743 CFBurstTrieCursorRef cursor
= (CFBurstTrieCursorRef
)calloc(sizeof(_CFBurstTrieCursor
), 1);
744 if (!CFBurstTrieSetCursorForBytes(trie
, cursor
, bytes
, length
)) {
745 CFBurstTrieCursorRelease(cursor
);
751 CFBurstTrieCursorRef
CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor
)
756 CFBurstTrieCursorRef newCursor
= (CFBurstTrieCursorRef
)calloc(sizeof(_CFBurstTrieCursor
), 1);
757 switch (cursor
->cursorType
) {
758 case _kCFBurstTrieCursorMapType
:
759 copyMapCursor(&cursor
->mapCursor
, &newCursor
->mapCursor
);
761 case _kCFBurstTrieCursorTrieType
:
765 newCursor
->cursorType
= cursor
->cursorType
;
766 newCursor
->trie
= cursor
->trie
;
770 Boolean
CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs
, CFBurstTrieCursorRef rhs
)
772 if (lhs
->trie
!= rhs
->trie
|| lhs
->cursorType
!= rhs
->cursorType
)
775 if (lhs
->cursorType
== _kCFBurstTrieCursorMapType
)
776 return areMapCursorsEqual(&lhs
->mapCursor
, &rhs
->mapCursor
);
781 Boolean
CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor
, const UInt8
* bytes
, CFIndex length
)
783 switch (cursor
->cursorType
) {
784 case _kCFBurstTrieCursorMapType
: {
785 CompactMapCursor tempCursor
;
786 copyMapCursor(&cursor
->mapCursor
, &tempCursor
);
787 if (advanceMapCursor(cursor
->trie
, (CompactMapCursor
*)&cursor
->mapCursor
, bytes
, length
))
790 copyMapCursor(&tempCursor
, &cursor
->mapCursor
);
794 case _kCFBurstTrieCursorTrieType
:
800 Boolean
CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor
, uint32_t *payload
)
802 switch (cursor
->cursorType
) {
803 case _kCFBurstTrieCursorMapType
:
804 return getMapCursorPayload(cursor
->trie
, (CompactMapCursor
*)&cursor
->mapCursor
, payload
);
805 case _kCFBurstTrieCursorTrieType
:
811 void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor
)
818 void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor
, void *ctx
, CFBurstTrieTraversalCallback callback
)
823 UInt8
*bytes
= (UInt8
*)calloc(1, MAX_KEY_LENGTH
);
824 uint32_t capacity
= MAX_KEY_LENGTH
;
826 Boolean stop
= FALSE
;
827 switch (cursor
->cursorType
) {
828 case _kCFBurstTrieCursorMapType
: {
829 CompactMapCursor tempCursor
;
830 copyMapCursor(&cursor
->mapCursor
, &tempCursor
);
831 traverseFromMapCursor(cursor
->trie
, &tempCursor
, bytes
, capacity
,length
, &stop
, ctx
, callback
);
834 case _kCFBurstTrieCursorTrieType
:
842 #pragma mark Insertion
845 static ListNodeRef
makeCFBurstTrieListNode(const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
) {
846 ListNodeRef node
= (ListNodeRef
) calloc(1, sizeof(*node
) + keylen
+ 1);
847 memcpy(node
->string
, key
, keylen
);
848 node
->string
[keylen
] = 0;
850 node
->length
= keylen
;
851 node
->weight
= weight
;
852 node
->payload
= payload
;
856 static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
) {
858 NextTrie next
= root
->slots
[*key
];
859 ListNodeRef newNode
= makeCFBurstTrieListNode(key
+1, keylen
-1, weight
, payload
);
860 newNode
->weight
= weight
;
861 newNode
->next
= (ListNodeRef
) NextTrie_GetPtr(next
);
862 next
= (uintptr_t) newNode
;
863 NextTrie_SetKind(next
, ListKind
);
864 root
->slots
[*key
] = next
;
866 // ** Handle payload.
867 root
->weight
= weight
;
868 root
->payload
= payload
;
872 static TrieLevelRef
burstCFBurstTrieLevel(CFBurstTrieRef trie
, ListNodeRef list
, uint32_t listCount
) {
873 TrieLevelRef newLevel
= (TrieLevelRef
) calloc(1, sizeof(struct _TrieLevel
));
875 addCFBurstTrieBurstLevel(trie
, newLevel
, list
->string
, list
->length
, list
->weight
, list
->payload
);
876 ListNodeRef temp
= list
;
883 static CFBTInsertCode
addCFBurstTrieListNode(CFBurstTrieRef trie
, ListNodeRef list
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
, uint32_t *listCount
)
885 CFBTInsertCode code
= FailedInsert
;
888 ListNodeRef last
= list
;
890 if (list
->length
== keylen
&& memcmp(key
, list
->string
, keylen
) == 0) {
891 list
->weight
+= weight
;
892 list
->payload
= payload
;
903 last
->next
= makeCFBurstTrieListNode(key
, keylen
, weight
, payload
);
911 static CFBTInsertCode
addCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
)
913 CFBTInsertCode code
= FailedInsert
;
915 NextTrie next
= root
->slots
[*key
];
916 if (NextTrie_GetKind(next
) == TrieKind
) {
917 TrieLevelRef nextLevel
= (TrieLevelRef
) NextTrie_GetPtr(next
);
918 code
= addCFBurstTrieLevel(trie
, nextLevel
, key
+1, keylen
-1, weight
, payload
);
920 if (NextTrie_GetKind(next
) == ListKind
) {
922 ListNodeRef listNode
= (ListNodeRef
) NextTrie_GetPtr(next
);
923 code
= addCFBurstTrieListNode(trie
, listNode
, key
+1, keylen
-1, weight
, payload
, &listCount
);
924 if (listCount
> trie
->containerSize
) {
925 next
= (uintptr_t) burstCFBurstTrieLevel(trie
, listNode
, listCount
);
926 NextTrie_SetKind(next
, TrieKind
);
929 // ** Make a new list node
930 next
= (uintptr_t) makeCFBurstTrieListNode(key
+1, keylen
-1, weight
, payload
);
931 NextTrie_SetKind(next
, ListKind
);
934 root
->slots
[*key
] = next
;
938 if (!root
->weight
) code
= NewTerm
;
939 else code
= ExistingTerm
;
940 root
->weight
+= weight
;
941 root
->payload
= payload
;
948 #pragma mark Searching
950 static void findCFBurstTrieList(CFBurstTrieRef trie
, TrieCursor
*cursor
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
952 ListNodeRef list
= (ListNodeRef
)NextTrie_GetPtr(cursor
->next
);
953 int len
= cursor
->prefixlen
-cursor
->keylen
;
954 len
= len
<= 0 ? 0 : len
;
956 int lencompare
= list
->length
-len
;
957 if (list
->length
>= len
&&
958 (len
== 0 || memcmp(list
->string
, cursor
->prefix
+cursor
->keylen
, len
) == 0)) {
959 memcpy(cursor
->key
+cursor
->keylen
, list
->string
, list
->length
);
960 cursor
->key
[cursor
->keylen
+list
->length
] = 0;
961 cursor
->next
= (NextTrie
)list
;
962 if (list
->payload
&& callback(ctx
, cursor
->key
, list
->payload
, lencompare
==0)) return;
968 static void findCFBurstTrieMappedPage(CFBurstTrieRef trie
, MapCursor
*cursor
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
970 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
971 uint32_t end
= page
->length
;
973 int len
= cursor
->prefixlen
-cursor
->keylen
;
974 len
= len
<= 0 ? 0 : len
;
975 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
976 uint8_t pfx
[CHARACTER_SET_SIZE
];
977 PageEntryPacked
*lastEntry
= 0;
979 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[cur
];
980 int lencompare
= (entry
->strlen
+entry
->pfxLen
)-len
;
981 if (lastEntry
&& entry
->pfxLen
>lastEntry
->pfxLen
) memcpy(pfx
+lastEntry
->pfxLen
, lastEntry
->string
, entry
->pfxLen
-lastEntry
->pfxLen
);
982 if (lencompare
>= 0 &&
983 (len
== 0 || (__builtin_memcmp(pfx
, cursor
->prefix
+cursor
->keylen
, entry
->pfxLen
) == 0 &&
984 __builtin_memcmp(entry
->string
, cursor
->prefix
+cursor
->keylen
+entry
->pfxLen
, cursor
->prefixlen
-cursor
->keylen
-entry
->pfxLen
) == 0))) {
985 memcpy(cursor
->key
+cursor
->keylen
, pfx
, entry
->pfxLen
);
986 memcpy(cursor
->key
+cursor
->keylen
+entry
->pfxLen
, entry
->string
, entry
->strlen
);
987 cursor
->key
[cursor
->keylen
+entry
->pfxLen
+entry
->strlen
] = 0;
988 if (entry
->payload
&& callback(ctx
, (const uint8_t *)cursor
->key
, entry
->payload
, lencompare
==0)) return;
991 cur
+= sizeof(*entry
) + entry
->strlen
;
995 PageEntry
*entry
= (PageEntry
*)&page
->data
[cur
];
996 int lencompare
= entry
->strlen
-len
;
997 if (lencompare
>= 0 && __builtin_memcmp(entry
->string
, cursor
->prefix
+cursor
->keylen
, len
) == 0) {
998 memcpy(cursor
->key
+cursor
->keylen
, entry
->string
, entry
->strlen
);
999 cursor
->key
[cursor
->keylen
+entry
->strlen
] = 0;
1000 if (entry
->payload
&& callback(ctx
, (const uint8_t *)cursor
->key
, entry
->payload
, lencompare
==0)) return;
1002 cur
+= sizeof(*entry
) + entry
->strlen
;
1008 static void findCFBurstTrieLevel(CFBurstTrieRef trie
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1010 if (cursor
->keylen
< cursor
->prefixlen
) {
1011 cursor
->next
= ((TrieLevelRef
)NextTrie_GetPtr(cursor
->next
))->slots
[cursor
->prefix
[cursor
->keylen
]];
1012 cursor
->key
[cursor
->keylen
++] = cursor
->prefix
[cursor
->keylen
];
1014 if (NextTrie_GetKind(cursor
->next
) == TrieKind
) {
1015 findCFBurstTrieLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1016 } else if (NextTrie_GetKind(cursor
->next
) == ListKind
) {
1017 findCFBurstTrieList(trie
, cursor
, ctx
, callback
);
1020 TrieLevelRef root
= (TrieLevelRef
)NextTrie_GetPtr(cursor
->next
);
1021 if (root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1022 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1023 traverseCFBurstTrieLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1027 static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1029 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1030 if (cursor
->keylen
< cursor
->prefixlen
) {
1031 uint8_t mykey
= *(cursor
->prefix
+cursor
->keylen
);
1032 cursor
->key
[cursor
->keylen
++] = *(cursor
->prefix
+cursor
->keylen
);
1034 uint8_t slot
= mykey
/ 64;
1035 uint8_t bit
= mykey
% 64;
1037 uint64_t bword
= root
->bitmap
[slot
];
1039 if (bword
& (1ull << bit
)) {
1040 // ** Count all the set bits up to this bit
1041 for (int i
=0; i
< slot
; i
++) item
+= __builtin_popcountll(root
->bitmap
[i
]);
1042 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1043 uint32_t offset
= root
->slots
[item
];
1044 cursor
->next
= offset
;
1046 if (DiskNextTrie_GetKind(offset
) == TrieKind
) {
1047 findCFBurstTrieMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1048 } else if (DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1049 findCFBurstTrieCompactMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1050 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1051 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1055 if(root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1056 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1057 traverseCFBurstTrieCompactMappedLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1061 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1063 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1064 if (cursor
->keylen
< cursor
->prefixlen
) {
1065 uint8_t slot
= *(cursor
->prefix
+cursor
->keylen
);
1066 cursor
->next
= root
->slots
[slot
];
1067 cursor
->key
[cursor
->keylen
++] = slot
;
1069 if (DiskNextTrie_GetKind(cursor
->next
) == TrieKind
) {
1070 findCFBurstTrieMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1071 } else if (DiskNextTrie_GetKind(cursor
->next
) == CompactTrieKind
) {
1072 findCFBurstTrieCompactMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1073 } else if (DiskNextTrie_GetKind(cursor
->next
) == ListKind
) {
1074 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1077 if (root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1078 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1079 traverseCFBurstTrieMappedLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1083 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1085 cursor
->key
[cursor
->keylen
] = 0;
1086 uint32_t len
= cursor
->keylen
;
1087 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1088 NextTrie next
= root
->slots
[i
];
1089 cursor
->keylen
= len
;
1090 cursor
->key
[cursor
->keylen
++] = i
;
1092 if (NextTrie_GetKind(next
) == TrieKind
) {
1093 TrieLevelRef level
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1094 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1095 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1096 traverseCFBurstTrieLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1097 } else if (NextTrie_GetKind(next
) == ListKind
) {
1098 cursor
->next
= next
;
1099 cursor
->key
[cursor
->keylen
] = 0;
1100 findCFBurstTrieList(trie
, cursor
, ctx
, callback
);
1105 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1107 cursor
->key
[cursor
->keylen
] = 0;
1108 uint32_t len
= cursor
->keylen
;
1110 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1111 uint32_t offset
= (uint32_t)root
->slots
[i
];
1112 cursor
->keylen
= len
;
1113 cursor
->key
[cursor
->keylen
++] = i
;
1115 if (DiskNextTrie_GetKind(offset
) == TrieKind
) {
1116 MapTrieLevelRef level
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1117 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1118 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1119 traverseCFBurstTrieMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1120 } else if (DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1121 CompactMapTrieLevelRef level
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1122 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1123 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1124 traverseCFBurstTrieCompactMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1125 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1126 cursor
->next
= offset
;
1127 cursor
->key
[cursor
->keylen
] = 0;
1128 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1133 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, CompactMapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1135 cursor
->key
[cursor
->keylen
] = 0;
1136 uint32_t len
= cursor
->keylen
;
1137 for (uint32_t c
=0; c
< CHARACTER_SET_SIZE
; c
++) {
1138 //** This could be optimized to remember what the last slot/item was and not count bits over and over.
1140 uint32_t slot
= mykey
/ 64;
1141 uint32_t bit
= mykey
% 64;
1143 uint64_t bword
= root
->bitmap
[slot
];
1144 cursor
->keylen
= len
;
1146 if (bword
& (1ull << bit
)) {
1147 // ** Count all the set bits up to this bit
1148 for (int i
=0; i
< slot
; i
++) item
+= __builtin_popcountll(root
->bitmap
[i
]);
1149 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1150 uint32_t offset
= root
->slots
[item
];
1151 cursor
->key
[cursor
->keylen
++] = mykey
;
1153 if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1154 CompactMapTrieLevelRef level
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1155 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1156 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1157 traverseCFBurstTrieCompactMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1158 } else if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1159 traverseCFBurstTrieMappedLevel(trie
, (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
), cursor
, exactmatch
, ctx
, callback
);
1160 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1161 cursor
->next
= offset
;
1162 cursor
->key
[cursor
->keylen
] = 0;
1163 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1169 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool)) {
1170 if (trie
->mapBase
) {
1171 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
1172 fprintf(stderr
, "Please use CFBurstTrieCursorRef API for file based trie.\n");
1175 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1177 csr
.next
= header
->rootOffset
;
1178 csr
.prefix
= prefix
;
1179 csr
.prefixlen
= prefixLen
;
1182 findCFBurstTrieMappedLevel(trie
, &csr
, exactmatch
, ctx
, callback
);
1186 csr
.next
= ((uintptr_t)&trie
->root
)|TrieKind
;
1187 csr
.prefix
= prefix
;
1188 csr
.prefixlen
= prefixLen
;
1191 findCFBurstTrieLevel(trie
, &csr
, exactmatch
, ctx
, callback
);
1195 CF_INLINE
uint32_t getPackedPageEntrySize(PageEntryPacked
*entry
)
1197 return sizeof(PageEntryPacked
) + entry
->strlen
;
1200 CF_INLINE
uint32_t getPageEntrySize(PageEntry
*entry
)
1202 return sizeof(PageEntry
) + entry
->strlen
;
1206 static void _printPageEntry(PageEntryPacked *entry) {
1207 printf("entry 0x%p:\n", entry);
1208 printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen);
1209 if (entry->strlen > 0) {
1210 printf("string = ");
1211 for (int i = 0; i < entry->strlen; ++i)
1212 printf("%d ", entry->string[i]);
1218 static Boolean
advanceCursorOnMappedPageForByte(Page
*page
, CompactMapCursor
*cursor
, UInt8 byte
) {
1219 PageEntryPacked
*entry
;
1220 Boolean found
= FALSE
;
1221 uint32_t minPrefixLength
= 0;
1223 if (cursor
->isOnPage
) {
1224 entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1225 //_printPageEntry(entry);
1226 BOOL shouldContinue
= TRUE
;
1228 if (!(cursor
->entryOffsetInPage
== 0 && entry
->strlen
== 0)) {
1229 if (cursor
->entryOffsetInPage
== entry
->strlen
- 1) {
1230 minPrefixLength
= entry
->pfxLen
+ entry
->strlen
;
1231 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1233 cursor
->offsetInEntry
++;
1234 if (entry
->string
[cursor
->offsetInEntry
] == byte
)
1236 else if (entry
->string
[cursor
->offsetInEntry
] > byte
)
1237 shouldContinue
= FALSE
;
1239 minPrefixLength
= entry
->pfxLen
+ cursor
->offsetInEntry
;
1240 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1246 cursor
->isOnPage
= TRUE
;
1248 } else if (!shouldContinue
)
1251 cursor
->entryOffsetInPage
= 0;
1254 uint32_t pageSize
= page
->length
- sizeof(Page
);
1255 while (cursor
->entryOffsetInPage
< pageSize
) {
1256 entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1257 //_printPageEntry(entry);
1258 if (minPrefixLength
> entry
->pfxLen
)
1260 else if (minPrefixLength
< entry
->pfxLen
)
1261 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1263 if (entry
->strlen
== 0)
1264 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1266 if (entry
->string
[0] > byte
)
1267 // Entries are sorted alphabetically
1269 else if (entry
->string
[0] < byte
)
1270 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1272 cursor
->offsetInEntry
= 0;
1281 cursor
->isOnPage
= TRUE
;
1286 static Boolean
advanceCursorMappedPageWithPerfixCompression(Page
*page
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1289 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[0];
1290 if (!cursor
->isOnPage
) {
1291 cursor
->entryOffsetInPage
= 0;
1292 cursor
->offsetInEntry
= 0;
1293 cursor
->isOnPage
= entry
->pfxLen
== 0 && entry
->strlen
== 0;
1295 getMapCursorPayloadFromPackedPageEntry(entry
, cursor
, &cursor
->payload
);
1299 for (CFIndex i
= 0; i
< length
; ++i
) {
1300 if (!advanceCursorOnMappedPageForByte(page
, cursor
, bytes
[i
]))
1303 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1304 getMapCursorPayloadFromPackedPageEntry(entry
, cursor
, &cursor
->payload
);
1308 static Boolean
advanceCursorMappedPageSortedByKey(Page
*page
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1311 PageEntry
*entry
= (PageEntry
*)&page
->data
[0];
1312 if (!cursor
->isOnPage
) {
1313 cursor
->entryOffsetInPage
= 0;
1314 cursor
->offsetInEntry
= 0;
1315 cursor
->isOnPage
= entry
->strlen
== 0;
1317 getMapCursorPayloadFromPageEntry(entry
, cursor
, &cursor
->payload
);
1322 uint32_t pageSize
= page
->length
- sizeof(Page
);
1323 const UInt8
* prefix
= NULL
;
1324 uint32_t prefixLength
= 0;
1326 if (cursor
->isOnPage
) {
1327 entry
= (PageEntry
*)&page
->data
[cursor
->entryOffsetInPage
];
1328 prefix
= entry
->string
;
1329 prefixLength
= cursor
->offsetInEntry
+ 1;
1332 while (cursor
->entryOffsetInPage
< pageSize
) {
1333 PageEntry
*entry
= (PageEntry
*)&page
->data
[cursor
->entryOffsetInPage
];
1334 if (entry
->strlen
== 0) {
1335 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1339 if (entry
->strlen
<= prefixLength
)
1343 if (prefixLength
> 0)
1344 cmpResult
= __builtin_memcmp(entry
->string
, prefix
, prefixLength
);
1347 else if (cmpResult
== 0) {
1348 if (entry
->strlen
< prefixLength
+ length
) {
1349 int cmpResult2
= __builtin_memcmp(entry
->string
+ prefixLength
, bytes
, entry
->strlen
- prefixLength
);
1353 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1355 int cmpResult2
= __builtin_memcmp(entry
->string
+ prefixLength
, bytes
, length
);
1358 else if (cmpResult2
== 0) {
1359 cursor
->isOnPage
= TRUE
;
1360 cursor
->offsetInEntry
= prefixLength
+ length
- 1;
1361 getMapCursorPayloadFromPageEntry(entry
, cursor
, &cursor
->payload
);
1364 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1367 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1373 static Boolean
advanceCursorMappedPage(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1375 if (!bytes
|| length
< 0)
1378 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1379 uint32_t pageSize
= page
->length
- sizeof(Page
);
1383 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1384 return advanceCursorMappedPageSortedByKey(page
, cursor
, bytes
, length
);
1385 else if (trie
->cflags
& kCFBurstTriePrefixCompression
)
1386 return advanceCursorMappedPageWithPerfixCompression(page
, cursor
, bytes
, length
);
1391 static Boolean
advanceCursorCompactMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1393 if (!bytes
|| length
< 0)
1396 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1398 cursor
->payload
= root
->payload
;
1402 uint8_t slot
= bytes
[0] / 64;
1403 uint8_t bit
= bytes
[0] % 64;
1405 uint64_t bword
= root
->bitmap
[slot
];
1406 if (bword
& (1ull << bit
)) {
1407 // Count all the set bits up to this bit
1408 for (int i
= 0; i
< slot
; ++i
)
1409 item
+= __builtin_popcountll(root
->bitmap
[i
]);
1410 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1411 cursor
->next
= root
->slots
[item
];
1412 return advanceMapCursor(trie
, cursor
, bytes
+ 1, length
- 1);
1418 static Boolean
advanceCursorMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1420 if (!bytes
|| length
< 0)
1423 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1425 cursor
->payload
= root
->payload
;
1429 cursor
->next
= root
->slots
[bytes
[0]];
1430 return advanceMapCursor(trie
, cursor
, bytes
+ 1, length
- 1);
1433 static Boolean
advanceMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1435 bool result
= FALSE
;
1436 switch (DiskNextTrie_GetKind(cursor
->next
)) {
1438 result
= advanceCursorMappedLevel(trie
, cursor
, bytes
, length
);
1440 case CompactTrieKind
:
1441 result
= advanceCursorCompactMappedLevel(trie
, cursor
, bytes
, length
);
1444 result
= advanceCursorMappedPage(trie
, cursor
, bytes
, length
);
1447 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1448 if (cursor
->next
== header
->rootOffset
)
1449 result
= advanceCursorMappedLevel(trie
, cursor
, bytes
, length
);
1456 static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1458 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1459 if (root
->payload
) {
1460 callback(ctx
, bytes
, length
, root
->payload
, stop
);
1465 if (length
>= capacity
)
1468 for (int i
= 0; i
< CHARACTER_SET_SIZE
; ++i
) {i
=
1470 cursor
->next
= (uint32_t)root
->slots
[i
];;
1471 cursor
->isOnPage
= FALSE
;
1472 cursor
->entryOffsetInPage
= 0;
1473 cursor
->offsetInEntry
= 0;
1474 cursor
->payload
= 0;
1475 traverseFromMapCursor(trie
, cursor
, bytes
, capacity
- 1, length
+ 1, stop
, ctx
, callback
);
1481 static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1483 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1484 if (root
->payload
) {
1485 callback(ctx
, bytes
, length
, root
->payload
, stop
);
1490 if (length
>= capacity
)
1492 for (int c
= 0; c
< CHARACTER_SET_SIZE
; ++c
) {
1494 //This could be optimized to remember what the last slot/item was and not count bits over and over.
1495 uint8_t slot
= c
/ 64;
1496 uint8_t bit
= c
% 64;
1498 uint64_t bword
= root
->bitmap
[slot
];
1499 if (bword
& (1ull << bit
)) {
1500 // Count all the set bits up to this bit
1501 for (int i
= 0; i
< slot
; ++i
)
1502 item
+= __builtin_popcountll(root
->bitmap
[i
]);
1503 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1504 cursor
->next
= root
->slots
[item
];
1505 cursor
->isOnPage
= FALSE
;
1506 cursor
->entryOffsetInPage
= 0;
1507 cursor
->offsetInEntry
= 0;
1508 cursor
->payload
= 0;
1509 traverseFromMapCursor(trie
, cursor
, bytes
, capacity
- 1, length
+ 1, stop
, ctx
, callback
);
1516 static void traverseFromMapCursorMappedPageWithPrefixCompression(Page
*page
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1518 uint32_t pageSize
= page
->length
- sizeof(Page
);
1519 uint32_t offset
= cursor
->entryOffsetInPage
;
1520 uint32_t minPrefixLength
= 0;
1521 if (cursor
->isOnPage
) {
1522 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[offset
];
1523 int32_t remainingLength
= entry
->strlen
- cursor
->offsetInEntry
- 1;
1524 if (remainingLength
>= 0 && remainingLength
<= capacity
) {
1525 memcpy(bytes
+ length
, entry
->string
+ cursor
->offsetInEntry
+ 1, remainingLength
);
1526 callback(ctx
, bytes
, length
+ remainingLength
, entry
->payload
, stop
);
1530 minPrefixLength
= entry
->pfxLen
+ cursor
->offsetInEntry
;
1531 offset
+= getPackedPageEntrySize(entry
);
1533 PageEntryPacked
*previousEntry
= NULL
;
1534 while (offset
< pageSize
) {
1535 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[offset
];
1536 if (minPrefixLength
> entry
->pfxLen
)
1538 else if (entry
->payload
&& entry
->strlen
<= capacity
) {
1540 length
-= previousEntry
->strlen
+ previousEntry
->pfxLen
- entry
->pfxLen
;
1541 memcpy(bytes
+ length
, entry
->string
, entry
->strlen
);
1542 callback(ctx
, bytes
, length
+ entry
->strlen
, entry
->payload
, stop
);
1543 length
+= entry
->strlen
;
1547 previousEntry
= entry
;
1548 offset
+= getPackedPageEntrySize(entry
);
1552 static void traverseFromMapCursorMappedPageSortedByKey(Page
*page
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1554 uint32_t pageSize
= page
->length
- sizeof(Page
);
1555 uint32_t offset
= cursor
->entryOffsetInPage
;
1556 uint32_t prefixLength
= 0;
1557 const UInt8
*prefix
= NULL
;
1558 if (cursor
->isOnPage
) {
1559 PageEntry
*entry
= (PageEntry
*)&page
->data
[offset
];
1560 int32_t remainingLength
= entry
->strlen
- cursor
->offsetInEntry
- 1;
1561 if (remainingLength
>= 0 && remainingLength
<= capacity
) {
1562 memcpy(bytes
+ length
, entry
->string
+ cursor
->offsetInEntry
, remainingLength
);
1563 callback(ctx
, bytes
, length
+ remainingLength
, entry
->payload
, stop
);
1567 prefixLength
= cursor
->offsetInEntry
+ 1;
1568 prefix
= entry
->string
;
1569 offset
+= getPageEntrySize(entry
);
1572 while (offset
< pageSize
) {
1573 PageEntry
*entry
= (PageEntry
*)&page
->data
[offset
];
1574 if (entry
->strlen
< prefixLength
)
1577 int cmpResult
= __builtin_memcmp(entry
->string
, prefix
, prefixLength
);
1580 else if (entry
->payload
&& entry
->strlen
<= capacity
) {
1581 if (entry
->strlen
> 0)
1582 memcpy(bytes
+ length
, entry
->string
+ prefixLength
, entry
->strlen
- prefixLength
);
1583 callback(ctx
, bytes
, length
+ entry
->strlen
- prefixLength
, entry
->payload
, stop
);
1587 offset
+= getPageEntrySize(entry
);
1592 static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1594 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1595 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1596 traverseFromMapCursorMappedPageSortedByKey(page
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1597 else if (trie
->cflags
& kCFBurstTriePrefixCompression
)
1598 traverseFromMapCursorMappedPageWithPrefixCompression(page
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1601 void traverseFromMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1603 switch (DiskNextTrie_GetKind(cursor
->next
)) {
1605 traverseFromMapCursorMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1607 case CompactTrieKind
:
1608 traverseFromMapCursorCompactMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1611 traverseFromMapCursorMappedPage(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1614 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1615 if (cursor
->next
== header
->rootOffset
) {
1616 traverseFromMapCursorMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1623 void copyMapCursor(const CompactMapCursor
*source
, CompactMapCursor
* destination
)
1625 destination
->next
= source
->next
;
1626 destination
->entryOffsetInPage
= source
->entryOffsetInPage
;
1627 destination
->offsetInEntry
= source
->offsetInEntry
;
1628 destination
->isOnPage
= source
->isOnPage
;
1629 destination
->payload
= source
->payload
;
1632 Boolean
areMapCursorsEqual(const CompactMapCursor
*lhs
, const CompactMapCursor
*rhs
)
1634 return lhs
->entryOffsetInPage
== rhs
->entryOffsetInPage
&& lhs
->isOnPage
== rhs
->isOnPage
&& lhs
->next
== rhs
->next
&& lhs
->offsetInEntry
== rhs
->offsetInEntry
;
1637 static Boolean
getMapCursorPayloadFromPackedPageEntry(PageEntryPacked
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1641 if (cursor
->entryOffsetInPage
== 0 && cursor
->offsetInEntry
== 0 && entry
->strlen
== 0) {
1643 *payload
= entry
->payload
;
1645 } else if (cursor
->offsetInEntry
!= entry
->strlen
- 1)
1649 *payload
= entry
->payload
;
1654 static Boolean
getMapCursorPayloadFromPageEntry(PageEntry
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1658 if (cursor
->entryOffsetInPage
== 0 && cursor
->offsetInEntry
== 0 && entry
->strlen
== 0) {
1660 *payload
= entry
->payload
;
1662 } else if (cursor
->offsetInEntry
!= entry
->strlen
- 1)
1666 *payload
= entry
->payload
;
1671 Boolean
getMapCursorPayload(CFBurstTrieRef trie
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1675 if (cursor
->payload
) {
1677 *payload
= cursor
->payload
;
1685 static Boolean
burstTrieMappedFind(DiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1686 Boolean success
= false;
1688 uint32_t offset
= CFSwapInt32LittleToHost((uint32_t)trie
->slots
[*key
]);
1689 if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1690 return burstTrieMappedFind((DiskTrieLevelRef
)DiskNextTrie_GetPtr(map
,offset
), map
, key
+1, length
-1, payload
, prefix
);
1691 } else if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1692 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1694 if(DiskNextTrie_GetKind(offset
) == ListKind
) {
1695 return burstTrieMappedPageFind((StringPage
*)DiskNextTrie_GetPtr(map
, offset
), key
+1, length
-1, payload
, prefix
);
1702 SetPayload(payload
, CFSwapInt32LittleToHost(trie
->payload
));
1709 static Boolean
burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1710 Boolean success
= false;
1712 uint32_t mykey
= *key
;
1713 uint32_t slot
= mykey
/ 64;
1714 uint32_t bit
= mykey
% 64;
1716 uint64_t bword
= CFSwapInt64LittleToHost(trie
->bitmap
[slot
]);
1717 if (bword
& (1ull << bit
)) {
1718 /* Count all the set bits up to this bit */
1719 for (int i
=0; i
< slot
; i
++) {
1720 item
+= __builtin_popcountll(trie
->bitmap
[i
]);
1722 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1723 uint32_t offset
= CFSwapInt32LittleToHost((uint32_t)trie
->slots
[item
]);
1724 if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1725 return burstTrieMappedFind((DiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1726 } else if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1727 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1730 if(DiskNextTrie_GetKind(offset
) == ListKind
) {
1731 return burstTrieMappedPageFind((StringPage
*)DiskNextTrie_GetPtr(map
, offset
), key
+1, length
-1, payload
, prefix
);
1739 SetPayload(payload
, CFSwapInt32LittleToHost(trie
->payload
));
1746 static Boolean
burstTrieMappedPageFind(StringPage
*page
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1747 Boolean success
= false;
1748 uint32_t end
= CFSwapInt32LittleToHost(page
->length
);
1753 StringPageEntryPacked
*entry
= (StringPageEntryPacked
*)&page
->data
[cur
];
1754 uint16_t strlen
= entry
->pfxLen
+CFSwapInt16LittleToHost(entry
->strlen
);
1755 if (strlen
== length
&& __builtin_memcmp(pfx
, key
, entry
->pfxLen
) == 0 && __builtin_memcmp(entry
->string
, key
+entry
->pfxLen
, length
-entry
->pfxLen
) == 0) {
1756 SetPayload(payload
, CFSwapInt32LittleToHost(entry
->payload
));
1760 memcpy(pfx
+entry
->pfxLen
, entry
->string
, MIN(255-entry
->pfxLen
, length
-entry
->pfxLen
));
1761 cur
+= sizeof(*entry
) + strlen
- entry
->pfxLen
;
1766 StringPageEntry
*entry
= (StringPageEntry
*)&page
->data
[cur
];
1767 uint16_t strlen
= CFSwapInt16LittleToHost(entry
->strlen
);
1768 if (strlen
== length
&& __builtin_memcmp(entry
->string
, key
, length
) == 0) {
1769 SetPayload(payload
, CFSwapInt32LittleToHost(entry
->payload
));
1773 cur
+= sizeof(*entry
) + strlen
;
1784 #pragma mark Serialization
1787 static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie
, TrieLevelRef root
, uint32_t *offset
, off_t start_offset
, bool dispose
, bool isroot
, int fd
)
1792 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) if (root
->slots
[i
]) count
++;
1794 uint32_t this_offset
= *offset
;
1796 if ((trie
->cflags
& kCFBurstTrieBitmapCompression
) && count
< MAX_BITMAP_SIZE
&& !isroot
) {
1797 size_t size
= sizeof(CompactMapTrieLevel
) + sizeof(uint32_t) * count
;
1800 CompactMapTrieLevel
*maptrie
= (CompactMapTrieLevel
*)alloca(size
);
1801 bzero(maptrie
, size
);
1804 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1805 NextTrie next
= root
->slots
[i
];
1807 uint32_t slot
= i
/ 64;
1808 uint32_t bit
= i
% 64;
1809 maptrie
->bitmap
[slot
] |= 1ull<<bit
;
1810 if (NextTrie_GetKind(next
) == TrieKind
) {
1811 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1812 uint32_t childOffset
= *offset
;
1813 if (serializeCFBurstTrieLevels(trie
, nextLevel
, offset
, start_offset
, true, false, fd
)) {
1814 maptrie
->slots
[offsetSlot
] = (TrieKind
|childOffset
);
1816 maptrie
->slots
[offsetSlot
] = (CompactTrieKind
|childOffset
);
1819 maptrie
->slots
[offsetSlot
] = next
;
1824 maptrie
->payload
= root
->payload
;
1827 for (int i
=0; i
< 4; i
++) bitcount
+= __builtin_popcountll(maptrie
->bitmap
[i
]);
1828 assert(bitcount
== count
);
1830 pwrite(fd
, maptrie
, size
, this_offset
+start_offset
);
1833 MapTrieLevel maptrie
;
1834 *offset
+= sizeof(maptrie
);
1836 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1837 NextTrie next
= root
->slots
[i
];
1838 if (NextTrie_GetKind(next
) == TrieKind
) {
1839 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1840 uint32_t childOffset
= *offset
;
1841 if (serializeCFBurstTrieLevels(trie
, nextLevel
, offset
, start_offset
, true, false, fd
)) {
1842 maptrie
.slots
[i
] = (TrieKind
|childOffset
);
1844 maptrie
.slots
[i
] = (CompactTrieKind
|childOffset
);
1847 maptrie
.slots
[i
] = next
;
1850 maptrie
.payload
= root
->payload
;
1851 pwrite(fd
, &maptrie
, sizeof(maptrie
), this_offset
+start_offset
);
1854 if (dispose
) free(root
);
1858 static void serializeCFBurstTrieList(CFBurstTrieRef trie
, ListNodeRef listNode
, int fd
)
1861 size_t size
= trie
->containerSize
;
1863 // ** Temp list of nodes to sort
1864 ListNodeRef
*nodes
= (ListNodeRef
*)malloc(sizeof(ListNodeRef
) * size
);
1865 for (listCount
= 0; listNode
; listCount
++) {
1866 if (listCount
>= size
) {
1868 nodes
= (ListNodeRef
*)realloc(nodes
, sizeof(ListNodeRef
) * size
);
1870 nodes
[listCount
] = listNode
;
1871 listNode
= listNode
->next
;
1874 char _buffer
[MAX_BUFFER_SIZE
];
1875 size_t bufferSize
= (sizeof(Page
) + size
* (sizeof(PageEntryPacked
) + MAX_STRING_SIZE
));
1876 char *buffer
= bufferSize
< MAX_BUFFER_SIZE
? _buffer
: (char *) malloc(bufferSize
);
1878 Page
*page
= (Page
*)buffer
;
1879 uint32_t current
= 0;
1881 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
1882 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeStringCompare
);
1884 ListNodeRef last
= 0;
1885 for (int i
=0; i
< listCount
; i
++) {
1886 listNode
= nodes
[i
];
1890 pfxLen
< CHARACTER_SET_SIZE
-1 &&
1891 pfxLen
< listNode
->length
&&
1892 pfxLen
< last
->length
&&
1893 listNode
->string
[pfxLen
] == last
->string
[pfxLen
];
1897 PageEntryPacked
*entry
= (PageEntryPacked
*)(&page
->data
[current
]);
1898 entry
->strlen
= listNode
->length
- pfxLen
;
1899 entry
->payload
= listNode
->payload
;
1900 entry
->pfxLen
= pfxLen
;
1901 memcpy(entry
->string
, listNode
->string
+pfxLen
, listNode
->length
-pfxLen
);
1902 current
+= listNode
->length
- pfxLen
+ sizeof(PageEntryPacked
);
1906 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1907 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeStringCompare
);
1909 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeWeightCompare
);
1911 for (int i
=0; i
< listCount
; i
++) {
1912 listNode
= nodes
[i
];
1913 PageEntry
*entry
= (PageEntry
*)(&page
->data
[current
]);
1914 entry
->strlen
= listNode
->length
;
1915 entry
->payload
= listNode
->payload
;
1916 memcpy(entry
->string
, listNode
->string
, listNode
->length
);
1917 current
+= listNode
->length
+ sizeof(PageEntry
);
1921 size_t len
= (sizeof(Page
) + current
+ 3) & ~3;
1922 page
->length
= current
;
1923 write(fd
, page
, len
);
1926 if (buffer
!= _buffer
) free(buffer
);
1929 static void serializeCFBurstTrieLists(CFBurstTrieRef trie
, TrieLevelRef root
, off_t start_offset
, int fd
)
1931 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1932 NextTrie next
= root
->slots
[i
];
1934 if (NextTrie_GetKind(next
) == TrieKind
) {
1935 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1936 serializeCFBurstTrieLists(trie
, nextLevel
, start_offset
, fd
);
1938 if (NextTrie_GetKind(next
) == ListKind
) {
1939 ListNodeRef listNode
= (ListNodeRef
)NextTrie_GetPtr(next
);
1940 offset
= lseek(fd
, 0, SEEK_CUR
) - start_offset
;
1941 serializeCFBurstTrieList(trie
, listNode
, fd
);
1942 finalizeCFBurstTrieList(listNode
);
1943 //assert((offset & 3)==0);
1944 root
->slots
[i
] = (offset
|ListKind
);
1950 static size_t serializeCFBurstTrie(CFBurstTrieRef trie
, size_t start_offset
, int fd
)
1953 header
.signature
= 0x0ddba11;
1954 header
.rootOffset
= 0;
1955 header
.count
= trie
->count
;
1957 header
.flags
= trie
->cflags
;
1958 header
.reserved
[0] = 0;
1961 lseek(fd
, start_offset
, SEEK_SET
);
1963 size_t header_size
= sizeof(header
);
1964 write(fd
, &header
, header_size
);
1966 serializeCFBurstTrieLists(trie
, &trie
->root
, start_offset
, fd
);
1968 offset
= lseek(fd
, 0, SEEK_CUR
) - start_offset
;
1969 size_t off
= offsetof(TrieHeader
, rootOffset
);
1970 size_t offsize
= sizeof(offset
);
1971 pwrite(fd
, &offset
, offsize
, off
+start_offset
);
1973 serializeCFBurstTrieLevels(trie
, &trie
->root
, &offset
, start_offset
, false, true, fd
);
1975 size_t off2
= offsetof(TrieHeader
, size
);
1976 offsize
= sizeof(offset
);
1977 pwrite(fd
, &offset
, offsize
, off2
+start_offset
);
1979 offset
= lseek(fd
, 0, SEEK_END
);
1980 return (size_t)(offset
-start_offset
);
1985 #pragma mark Release
1988 static void destroyCFBurstTrie(CFBurstTrieRef trie
) {
1989 if (trie
->mapBase
) {
1990 #if DEPLOYMENT_TARGET_WINDOWS
1991 UnmapViewOfFile(trie
->mapBase
);
1992 CloseHandle(trie
->mapHandle
);
1993 CloseHandle(trie
->mappedFileHandle
);
1995 munmap(trie
->mapBase
, trie
->mapSize
);
1998 finalizeCFBurstTrie(&trie
->root
);
2004 static void finalizeCFBurstTrie(TrieLevelRef trie
) {
2005 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
2006 if (NextTrie_GetKind(trie
->slots
[i
]) == TrieKind
) {
2007 finalizeCFBurstTrie((TrieLevelRef
)NextTrie_GetPtr(trie
->slots
[i
]));
2008 free((void *)NextTrie_GetPtr(trie
->slots
[i
]));
2009 } else if (NextTrie_GetKind(trie
->slots
[i
]) == ListKind
) {
2010 finalizeCFBurstTrieList((ListNodeRef
)NextTrie_GetPtr(trie
->slots
[i
]));
2015 static void finalizeCFBurstTrieList(ListNodeRef node
) {
2017 ListNodeRef next
= node
->next
;
2025 #pragma mark Helpers
2028 static int nodeWeightCompare(const void *a
, const void *b
) {
2029 ListNodeRef nodeA
= *(ListNodeRef
*)a
;
2030 ListNodeRef nodeB
= *(ListNodeRef
*)b
;
2031 return (nodeB
->weight
- nodeA
->weight
);
2034 static int nodeStringCompare(const void *a
, const void *b
) {
2035 ListNodeRef nodeA
= *(ListNodeRef
*)a
;
2036 ListNodeRef nodeB
= *(ListNodeRef
*)b
;
2037 int result
= memcmp((char *)nodeA
->string
, (char *)nodeB
->string
, MIN(nodeA
->length
, nodeB
->length
));
2038 if (result
== 0) result
= nodeA
->length
-nodeB
->length
;
2042 static bool containsKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
)
2044 uint32_t *ctx
= (uint32_t *)context
;
2045 if (exact
) *ctx
= payload
;
2049 static CFIndex
burstTrieConvertCharactersToUTF8(UniChar
*chars
, CFIndex numChars
, UInt8
*buffer
) {
2051 for (i
= j
= 0; i
< numChars
; i
++) {
2052 UniChar c
= chars
[i
];
2053 if (CFStringIsSurrogateHighCharacter(c
) && i
+ 1 < numChars
&& CFStringIsSurrogateLowCharacter(chars
[i
+ 1])) {
2054 UTF32Char lc
= CFStringGetLongCharacterForSurrogatePair(c
, chars
[i
+ 1]);
2055 buffer
[j
++] = 0xf0 + (lc
>> 18);
2056 buffer
[j
++] = 0x80 + ((lc
& 0x3ffff) >> 12);
2057 buffer
[j
++] = 0x80 + ((lc
& 0xfff) >> 6);
2058 buffer
[j
++] = 0x80 + (lc
& 0x3f);
2063 } else if (c
< 0x800) {
2064 buffer
[j
++] = 0xc0 + (c
>> 6);
2065 buffer
[j
++] = 0x80 + (c
& 0x3f);
2067 buffer
[j
++] = 0xe0 + (c
>> 12);
2068 buffer
[j
++] = 0x80 + ((c
& 0xfff) >> 6);
2069 buffer
[j
++] = 0x80 + (c
& 0x3f);