2 * Copyright (c) 2012 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 2008-2012, Apple Inc. All rights reserved.
26 Responsibility: Jennifer Moore
29 #include "CFInternal.h"
30 #include "CFBurstTrie.h"
31 #include "CFByteOrder.h"
39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
41 #include <sys/param.h>
48 #if DEPLOYMENT_TARGET_WINDOWS
50 #define statinfo _stat
51 #define stat(x,y) _NS_stat(x,y)
52 #define __builtin_memcmp(x, y, z) memcmp(x, y, z)
53 #define __builtin_popcountll(x) popcountll(x)
54 #define bzero(dst, size) ZeroMemory(dst, size)
58 static int pwrite(int fd
, const void *buf
, size_t nbyte
, off_t offset
) {
63 _lseek(fd
, offset
, SEEK_SET
);
66 int res
= _write(fd
, buf
, nbyte
);
68 // Return to previous offset
69 _lseek(fd
, pos
, SEEK_SET
);
79 #pragma mark Types and Utilities
82 #define MAX_STRING_ALLOCATION_SIZE 342
83 #define MAX_STRING_SIZE 1024
84 #define MAX_KEY_LENGTH MAX_STRING_SIZE * 4
85 #define CHARACTER_SET_SIZE 256
86 #define MAX_LIST_SIZE 256 // 64
87 #define MAX_BITMAP_SIZE 200
88 #define MAX_BUFFER_SIZE (4096<<2)
90 #define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3))
91 #define NextTrie_GetKind(p) (p & 3)
92 #define NextTrie_SetKind(p, kind) (p |= (3&kind))
94 #define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3)))
95 #define DiskNextTrie_GetKind(p) (p & 3)
96 #define DiskNextTrie_SetKind(p, kind) (p |= (3&kind))
98 // Use this macro to avoid forgetting to check the pointer before assigning value to it.
99 #define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0)
101 enum { Nothing
= 0, TrieKind
= 1, ListKind
= 2, CompactTrieKind
= 3 };
102 typedef enum { FailedInsert
= 0, NewTerm
= 1, ExistingTerm
= 2 } CFBTInsertCode
;
105 typedef uintptr_t NextTrie
;
107 typedef struct _TrieLevel
{
108 NextTrie slots
[CHARACTER_SET_SIZE
];
112 typedef TrieLevel
*TrieLevelRef
;
114 typedef struct _MapTrieLevel
{
115 uint32_t slots
[CHARACTER_SET_SIZE
];
118 typedef MapTrieLevel
*MapTrieLevelRef
;
120 typedef struct _CompactMapTrieLevel
{
121 uint64_t bitmap
[CHARACTER_SET_SIZE
/ 64];
124 } CompactMapTrieLevel
;
125 typedef CompactMapTrieLevel
*CompactMapTrieLevelRef
;
127 typedef struct _ListNode
{
128 struct _ListNode
*next
;
135 typedef struct _Page
{
140 typedef struct _PageEntryPacked
{
147 typedef struct _PageEntry
{
153 typedef struct _TrieHeader
{
159 uint64_t reserved
[16];
162 typedef struct _TrieCursor
{
168 const uint8_t *prefix
;
169 uint8_t key
[MAX_KEY_LENGTH
];
172 typedef struct _MapCursor
{
178 const uint8_t *prefix
;
179 uint8_t key
[MAX_STRING_SIZE
*4];
182 typedef struct _CompactMapCursor
{
184 uint32_t entryOffsetInPage
;
185 uint32_t offsetInEntry
;
187 // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether
188 // the cursor is merely pointing at the beginning of the page, or the first entry.
189 // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level,
190 // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor
191 // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and
192 // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the
193 // trie. the page level will actually contains three entries. The first entry corresponds to string "a".
194 // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will
195 // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still
196 // set to 0s. So we need an additional variable to make distinction between these two situations.
199 typedef struct _CompactMapCursor
*MapCursorRef
;
202 _kCFBurstTrieCursorTrieType
= 0,
203 _kCFBurstTrieCursorMapType
206 typedef struct _CFBurstTrieCursor
{
207 CompactMapCursor mapCursor
;
210 } _CFBurstTrieCursor
;
213 typedef struct _DiskTrieLevel
{
214 uint32_t slots
[CHARACTER_SET_SIZE
];
218 typedef DiskTrieLevel
*DiskTrieLevelRef
;
220 typedef struct _CompactDiskTrieLevel
{
221 uint64_t bitmap
[CHARACTER_SET_SIZE
/ 64]; // CHARACTER_SET_SIZE / 64bits per word
225 } CompactDiskTrieLevel
;
226 typedef CompactDiskTrieLevel
*CompactDiskTrieLevelRef
;
228 typedef struct _StringPage
{
233 typedef struct _StringPageEntryPacked
{
235 uint16_t strlen
; // make uint8_t if possible
238 } StringPageEntryPacked
;
240 typedef struct _StringPageEntry
{
241 uint16_t strlen
; // make uint8_t if possible
246 typedef struct _fileHeader
{
256 struct _CFBurstTrie
{
259 DiskTrieLevel diskRoot
;
260 MapTrieLevel maproot
;
267 uint32_t containerSize
;
269 #if DEPLOYMENT_TARGET_WINDOWS
271 HANDLE mappedFileHandle
;
277 #pragma mark Forward declarations
280 typedef struct _TraverseContext
{
282 void (*callback
)(void*, const UInt8
*, uint32_t, uint32_t);
285 bool foundKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
)
287 if (context
!= NULL
) {
288 TraverseContext
*ctx
= (TraverseContext
*)context
;
289 if (ctx
->context
&& ctx
->callback
) {
290 ctx
->callback(ctx
->context
, key
, 1, payload
);
296 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
298 static CFBTInsertCode
addCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
);
300 static void findCFBurstTrieLevel(CFBurstTrieRef trie
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool));
301 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool));
302 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
303 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
304 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, CompactMapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
305 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool));
307 static size_t serializeCFBurstTrie(CFBurstTrieRef trie
, size_t start_offset
, int fd
);
309 static Boolean
burstTrieMappedFind(DiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
310 static Boolean
burstTrieMappedPageFind(StringPage
*page
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
311 static Boolean
burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
);
313 static void destroyCFBurstTrie(CFBurstTrieRef trie
);
314 static void finalizeCFBurstTrie(TrieLevelRef trie
);
315 static void finalizeCFBurstTrieList(ListNodeRef node
);
317 static int nodeWeightCompare(const void *a
, const void *b
);
318 static int nodeStringCompare(const void *a
, const void *b
);
320 bool foundKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
);
321 bool containsKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
);
323 static CFIndex
burstTrieConvertCharactersToUTF8(UniChar
*chars
, CFIndex numChars
, UInt8
*buffer
);
325 static Boolean
advanceMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
);
326 static Boolean
getMapCursorPayload(CFBurstTrieRef trie
, const CompactMapCursor
*cursor
, uint32_t *payload
);
327 static void copyMapCursor(const CompactMapCursor
*source
, CompactMapCursor
* destination
);
328 static Boolean
areMapCursorsEqual(const CompactMapCursor
*lhs
, const CompactMapCursor
*rhs
);
329 static void traverseFromMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
);
330 static Boolean
getMapCursorPayloadFromPackedPageEntry(PageEntryPacked
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
);
331 static Boolean
getMapCursorPayloadFromPageEntry(PageEntry
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
);
335 #pragma mark Core Foundation boilerplate
338 static const void *_CFBurstTrieRetainCallback(CFAllocatorRef allocator
, const void *value
) {
339 CFBurstTrieRetain((CFBurstTrieRef
)value
);
343 static void _CFBurstTrieReleaseCallback(CFAllocatorRef allocator
, const void *value
) {
344 CFBurstTrieRelease((CFBurstTrieRef
)value
);
347 const CFDictionaryValueCallBacks kCFBurstTrieValueCallbacks
= {0, _CFBurstTrieRetainCallback
, _CFBurstTrieReleaseCallback
, NULL
, NULL
};
351 #pragma mark Public Interface
354 CFBurstTrieRef
CFBurstTrieCreateWithOptions(CFDictionaryRef options
) {
355 CFBurstTrieRef trie
= NULL
;
356 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
357 trie
->containerSize
= MAX_LIST_SIZE
;
359 CFNumberRef valueAsCFNumber
;
360 if (CFDictionaryGetValueIfPresent(options
, kCFBurstTrieCreationOptionNameContainerSize
, (const void **)&valueAsCFNumber
)) {
362 CFNumberGetValue(valueAsCFNumber
, kCFNumberIntType
, &value
);
363 trie
->containerSize
= value
> 2 && value
< 4096 ? value
: MAX_LIST_SIZE
;
369 CFBurstTrieRef
CFBurstTrieCreate() {
370 CFBurstTrieRef trie
= NULL
;
371 int listSize
= MAX_LIST_SIZE
;
372 CFNumberRef value
= CFNumberCreate(kCFAllocatorDefault
, kCFNumberIntType
, &listSize
);
373 CFMutableDictionaryRef options
= CFDictionaryCreateMutable(kCFAllocatorDefault
, 1, NULL
, NULL
);
374 CFDictionarySetValue(options
, kCFBurstTrieCreationOptionNameContainerSize
, value
);
375 trie
= CFBurstTrieCreateWithOptions(options
);
381 CFBurstTrieRef
CFBurstTrieCreateFromFile(CFStringRef path
) {
383 char filename
[PATH_MAX
];
386 /* Check valid path name */
387 if (!CFStringGetCString(path
, filename
, PATH_MAX
, kCFStringEncodingUTF8
)) return NULL
;
389 /* Check if file exists */
390 if (stat(filename
, &sb
) != 0) return NULL
;
392 /* Check if file can be opened */
393 if ((fd
=open(filename
, CF_OPENFLGS
|O_RDONLY
)) < 0) return NULL
;
395 #if DEPLOYMENT_TARGET_WINDOWS
396 HANDLE mappedFileHandle
= (HANDLE
)_get_osfhandle(fd
);
397 if (!mappedFileHandle
) return NULL
;
399 HANDLE mapHandle
= CreateFileMapping(mappedFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
400 if (!mapHandle
) return NULL
;
402 char *map
= (char *)MapViewOfFile(mapHandle
, FILE_MAP_READ
, 0, 0, sb
.st_size
);
403 if (!map
) return NULL
;
405 char *map
= mmap(0, sb
.st_size
, PROT_READ
, MAP_FILE
|MAP_SHARED
, fd
, 0);
408 CFBurstTrieRef trie
= NULL
;
409 TrieHeader
*header
= (TrieHeader
*)map
;
411 if (((uint32_t*)map
)[0] == 0xbabeface) {
412 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
414 trie
->mapSize
= CFSwapInt32LittleToHost(sb
.st_size
);
415 trie
->mapOffset
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->rootOffset
);
416 trie
->cflags
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->flags
);
417 trie
->count
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->count
);
419 #if DEPLOYMENT_TARGET_WINDOWS
420 trie
->mappedFileHandle
= mappedFileHandle
;
421 trie
->mapHandle
= mapHandle
;
423 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
426 } else if (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11) {
427 trie
= (CFBurstTrieRef
) calloc(1, sizeof(struct _CFBurstTrie
));
429 trie
->mapSize
= CFSwapInt32LittleToHost(sb
.st_size
);
430 trie
->cflags
= CFSwapInt32LittleToHost(header
->flags
);
431 trie
->count
= CFSwapInt32LittleToHost(header
->count
);
433 #if DEPLOYMENT_TARGET_WINDOWS
434 trie
->mappedFileHandle
= mappedFileHandle
;
435 trie
->mapHandle
= mapHandle
;
437 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here.
444 CFBurstTrieRef
CFBurstTrieCreateFromMapBytes(char *mapBase
) {
445 CFBurstTrieRef trie
= NULL
;
446 TrieHeader
*header
= (TrieHeader
*)mapBase
;
448 if (mapBase
&& ((uint32_t*)mapBase
)[0] == 0xbabeface) {
449 trie
= (CFBurstTrieRef
) malloc(sizeof(struct _CFBurstTrie
));
450 trie
->mapBase
= mapBase
;
451 trie
->mapSize
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->size
);
452 trie
->mapOffset
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->rootOffset
);
453 trie
->cflags
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->flags
);
454 trie
->count
= CFSwapInt32LittleToHost(((fileHeader
*)trie
->mapBase
)->count
);
456 } else if (mapBase
&& (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11)) {
457 trie
= (CFBurstTrieRef
) malloc(sizeof(struct _CFBurstTrie
));
458 trie
->mapBase
= mapBase
;
459 trie
->mapSize
= CFSwapInt32LittleToHost(header
->size
);
460 trie
->cflags
= CFSwapInt32LittleToHost(header
->flags
);
461 trie
->count
= CFSwapInt32LittleToHost(header
->count
);
467 Boolean
CFBurstTrieInsert(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex payload
) {
468 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, 1, (uint32_t)payload
);
471 Boolean
CFBurstTrieAdd(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t payload
) {
472 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, 1, payload
);
475 Boolean
CFBurstTrieInsertCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex payload
) {
476 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, 1, (uint32_t)payload
);
479 Boolean
CFBurstTrieAddCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t payload
) {
480 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, 1, payload
);
483 Boolean
CFBurstTrieInsertUTF8String(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, CFIndex payload
) {
484 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, 1, (uint32_t)payload
);
487 Boolean
CFBurstTrieAddUTF8String(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, uint32_t payload
) {
488 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, 1, payload
);
491 Boolean
CFBurstTrieInsertWithWeight(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex weight
, CFIndex payload
) {
492 return CFBurstTrieAddWithWeight(trie
, term
, termRange
, weight
, (uint32_t)payload
);
495 Boolean
CFBurstTrieAddWithWeight(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t weight
, uint32_t payload
) {
496 Boolean success
= false;
497 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
498 CFIndex bytesize
= termRange
.length
* 4; //** 4-byte max character size
499 if (!trie
->mapBase
&& termRange
.length
< MAX_STRING_SIZE
&& payload
> 0) {
501 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
503 if (bytesize
>= size
) {
505 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
507 CFStringGetBytes(term
, termRange
, kCFStringEncodingUTF8
, (UInt8
)'-', (Boolean
)0, key
, size
, &length
);
510 success
= CFBurstTrieAddUTF8StringWithWeight(trie
, key
, length
, weight
, payload
);
511 if (buffer
!= key
) free(key
);
516 Boolean
CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex weight
, CFIndex payload
) {
517 return CFBurstTrieAddCharactersWithWeight(trie
, chars
, numChars
, weight
, (uint32_t)payload
);
520 Boolean
CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t weight
, uint32_t payload
) {
521 Boolean success
= false;
522 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
523 CFIndex bytesize
= numChars
* 4; //** 4-byte max character size
524 if (!trie
->mapBase
&& numChars
< MAX_STRING_SIZE
&& payload
> 0) {
526 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
528 if (bytesize
>= size
) {
530 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
532 length
= burstTrieConvertCharactersToUTF8(chars
, numChars
, key
);
535 success
= CFBurstTrieAddUTF8StringWithWeight(trie
, key
, length
, weight
, payload
);
536 if (buffer
!= key
) free(key
);
541 Boolean
CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, CFIndex weight
, CFIndex payload
) {
542 return CFBurstTrieAddUTF8StringWithWeight(trie
, chars
, numChars
, weight
, (uint32_t)payload
);
545 Boolean
CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie
, UInt8
*chars
, CFIndex numChars
, uint32_t weight
, uint32_t payload
) {
546 CFBTInsertCode code
= FailedInsert
;
548 if (!trie
->mapBase
&& numChars
< MAX_STRING_SIZE
*4 && payload
> 0) {
549 code
= addCFBurstTrieLevel(trie
, &trie
->root
, chars
, numChars
, weight
, payload
);
550 if (code
== NewTerm
) trie
->count
++;
552 return code
> FailedInsert
;
555 Boolean
CFBurstTrieFind(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, CFIndex
*payload
) {
557 if (CFBurstTrieContains(trie
, term
, termRange
, &p
)) {
558 SetPayload(payload
, p
);
564 Boolean
CFBurstTrieContains(CFBurstTrieRef trie
, CFStringRef term
, CFRange termRange
, uint32_t *payload
) {
565 Boolean success
= false;
566 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
567 CFIndex bytesize
= termRange
.length
* 4; //** 4-byte max character size
568 if (termRange
.length
< MAX_STRING_SIZE
) {
570 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+1];
572 if (bytesize
>= size
) {
574 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
576 CFStringGetBytes(term
, termRange
, kCFStringEncodingUTF8
, (UInt8
)'-', (Boolean
)0, key
, size
, &length
);
579 success
= CFBurstTrieContainsUTF8String(trie
, key
, length
, payload
);
580 if (buffer
!= key
) free(key
);
585 Boolean
CFBurstTrieFindCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, CFIndex
*payload
) {
587 if (CFBurstTrieContainsCharacters(trie
, chars
, numChars
, &p
)) {
588 SetPayload(payload
, p
);
594 Boolean
CFBurstTrieContainsCharacters(CFBurstTrieRef trie
, UniChar
*chars
, CFIndex numChars
, uint32_t *payload
) {
595 Boolean success
= false;
596 CFIndex size
= MAX_STRING_ALLOCATION_SIZE
;
597 CFIndex bytesize
= numChars
* 4; //** 4-byte max character size
598 if (numChars
< MAX_STRING_SIZE
) {
600 UInt8 buffer
[MAX_STRING_ALLOCATION_SIZE
+ 1];
602 if (bytesize
>= size
) {
604 key
= (UInt8
*) malloc(sizeof(UInt8
) * size
+ 1);
606 length
= burstTrieConvertCharactersToUTF8(chars
, numChars
, key
);
609 success
= CFBurstTrieContainsUTF8String(trie
, key
, length
, payload
);
610 if (buffer
!= key
) free(key
);
615 Boolean
CFBurstTrieFindUTF8String(CFBurstTrieRef trie
, UInt8
*key
, CFIndex length
, CFIndex
*payload
) {
617 if (CFBurstTrieContainsUTF8String(trie
, key
, length
, &p
)) {
618 SetPayload(payload
, p
);
624 Boolean
CFBurstTrieContainsUTF8String(CFBurstTrieRef trie
, UInt8
*key
, CFIndex length
, uint32_t *payload
) {
625 Boolean success
= false;
626 if (length
< MAX_STRING_SIZE
) {
627 if (trie
->mapBase
&& ((fileHeader
*)trie
->mapBase
)->signature
== 0xbabeface) {
628 bool prefix
= (trie
->cflags
& kCFBurstTriePrefixCompression
);
629 success
= burstTrieMappedFind((DiskTrieLevelRef
)(trie
->mapBase
+CFSwapInt32LittleToHost((((uint32_t*)trie
->mapBase
)[1]))), trie
->mapBase
, key
, length
, payload
, prefix
);
630 } else if (trie
->mapBase
&& trie
->cflags
& (kCFBurstTriePrefixCompression
| kCFBurstTrieSortByKey
)) {
631 _CFBurstTrieCursor cursor
;
632 if (!CFBurstTrieSetCursorForBytes(trie
, &cursor
, key
, length
))
634 return CFBurstTrieCursorGetPayload(&cursor
, payload
);
638 traverseCFBurstTrieWithCursor(trie
, key
, length
, &cursor
, true, &found
, containsKey
);
639 if (found
) SetPayload(payload
, found
);
646 Boolean
CFBurstTrieSerialize(CFBurstTrieRef trie
, CFStringRef path
, CFBurstTrieOpts opts
) {
647 Boolean success
= false;
652 char filename
[PATH_MAX
];
654 /* Check valid path name */
655 if (!CFStringGetCString(path
, filename
, PATH_MAX
, kCFStringEncodingUTF8
)) return success
;
657 /* Check if file can be opened */
658 if ((fd
=open(filename
, CF_OPENFLGS
|O_RDWR
|O_CREAT
|O_TRUNC
, S_IRUSR
|S_IWUSR
)) < 0) return success
;
660 if (CFBurstTrieSerializeWithFileDescriptor(trie
, fd
, opts
)) success
= true;
667 Boolean
CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie
, int fd
, CFBurstTrieOpts opts
) {
668 Boolean success
= false;
669 if (!trie
->mapBase
&& fd
>= 0) {
670 off_t start_offset
= lseek(fd
, 0, SEEK_END
);
673 trie
->mapSize
= serializeCFBurstTrie(trie
, start_offset
, fd
);
675 #if DEPLOYMENT_TARGET_WINDOWS
676 HANDLE mappedFileHandle
= (HANDLE
)_get_osfhandle(fd
);
677 // We need to make sure we have our own handle to keep this file open as long as the mmap lasts
678 DuplicateHandle(GetCurrentProcess(), mappedFileHandle
, GetCurrentProcess(), &mappedFileHandle
, 0, 0, DUPLICATE_SAME_ACCESS
);
679 HANDLE mapHandle
= CreateFileMapping(mappedFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
680 if (!mapHandle
) return NULL
;
681 char *map
= (char *)MapViewOfFile(mapHandle
, FILE_MAP_READ
, 0, start_offset
, trie
->mapSize
);
682 if (!map
) return NULL
;
684 trie
->mapHandle
= mapHandle
;
685 trie
->mappedFileHandle
= mappedFileHandle
;
687 trie
->mapBase
= mmap(0, trie
->mapSize
, PROT_READ
, MAP_FILE
|MAP_SHARED
, fd
, start_offset
);
695 void CFBurstTrieTraverse(CFBurstTrieRef trie
, void *ctx
, void (*callback
)(void*, const UInt8
*, uint32_t, uint32_t)) {
696 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
697 if (!trie
->mapBase
|| (header
->signature
== 0xcafebabe || header
->signature
== 0x0ddba11)) {
699 TraverseContext context
;
700 context
.context
= ctx
;
701 context
.callback
= callback
;
702 traverseCFBurstTrieWithCursor(trie
, (const uint8_t *)"", 0, &cursor
, false, &context
, foundKey
);
707 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
709 traverseCFBurstTrieWithCursor(trie
, prefix
, prefixLen
, cursor
, false, ctx
, callback
);
712 void CFBurstTriePrint(CFBurstTrieRef trie
) {
716 CFIndex
CFBurstTrieGetCount(CFBurstTrieRef trie
) {
720 CFBurstTrieRef
CFBurstTrieRetain(CFBurstTrieRef trie
) {
725 void CFBurstTrieRelease(CFBurstTrieRef trie
) {
727 if (trie
->retain
== 0) destroyCFBurstTrie(trie
);
731 Boolean
CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie
, CFBurstTrieCursorRef cursor
, const UInt8
* bytes
, CFIndex length
)
733 if (!trie
->mapBase
|| !(trie
->cflags
& (kCFBurstTriePrefixCompression
| kCFBurstTrieSortByKey
))) {
734 //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n");
738 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
739 if (length
< 0 || !trie
)
744 cursor
->cursorType
= _kCFBurstTrieCursorMapType
;
745 cursor
->mapCursor
.next
= header
->rootOffset
;
746 cursor
->mapCursor
.isOnPage
= FALSE
;
747 cursor
->mapCursor
.entryOffsetInPage
= 0;
748 cursor
->mapCursor
.offsetInEntry
= 0;
749 cursor
->mapCursor
.payload
= 0;
753 if (!bytes
|| length
== 0)
756 return CFBurstTrieCursorAdvanceForBytes(cursor
, bytes
, length
);
760 CFBurstTrieCursorRef
CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie
, const UInt8
* bytes
, CFIndex length
)
762 CFBurstTrieCursorRef cursor
= (CFBurstTrieCursorRef
)calloc(sizeof(_CFBurstTrieCursor
), 1);
763 if (!CFBurstTrieSetCursorForBytes(trie
, cursor
, bytes
, length
)) {
764 CFBurstTrieCursorRelease(cursor
);
770 CFBurstTrieCursorRef
CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor
)
775 CFBurstTrieCursorRef newCursor
= (CFBurstTrieCursorRef
)calloc(sizeof(_CFBurstTrieCursor
), 1);
776 switch (cursor
->cursorType
) {
777 case _kCFBurstTrieCursorMapType
:
778 copyMapCursor(&cursor
->mapCursor
, &newCursor
->mapCursor
);
780 case _kCFBurstTrieCursorTrieType
:
784 newCursor
->cursorType
= cursor
->cursorType
;
785 newCursor
->trie
= cursor
->trie
;
789 Boolean
CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs
, CFBurstTrieCursorRef rhs
)
791 if (lhs
->trie
!= rhs
->trie
|| lhs
->cursorType
!= rhs
->cursorType
)
794 if (lhs
->cursorType
== _kCFBurstTrieCursorMapType
)
795 return areMapCursorsEqual(&lhs
->mapCursor
, &rhs
->mapCursor
);
800 Boolean
CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor
, const UInt8
* bytes
, CFIndex length
)
802 switch (cursor
->cursorType
) {
803 case _kCFBurstTrieCursorMapType
: {
804 CompactMapCursor tempCursor
;
805 copyMapCursor(&cursor
->mapCursor
, &tempCursor
);
806 if (advanceMapCursor(cursor
->trie
, (CompactMapCursor
*)&cursor
->mapCursor
, bytes
, length
))
809 copyMapCursor(&tempCursor
, &cursor
->mapCursor
);
813 case _kCFBurstTrieCursorTrieType
:
819 Boolean
CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor
, uint32_t *payload
)
821 switch (cursor
->cursorType
) {
822 case _kCFBurstTrieCursorMapType
:
823 return getMapCursorPayload(cursor
->trie
, (CompactMapCursor
*)&cursor
->mapCursor
, payload
);
824 case _kCFBurstTrieCursorTrieType
:
830 void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor
)
837 void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor
, void *ctx
, CFBurstTrieTraversalCallback callback
)
842 UInt8
*bytes
= (UInt8
*)calloc(1, MAX_KEY_LENGTH
);
843 uint32_t capacity
= MAX_KEY_LENGTH
;
845 Boolean stop
= FALSE
;
846 switch (cursor
->cursorType
) {
847 case _kCFBurstTrieCursorMapType
: {
848 CompactMapCursor tempCursor
;
849 copyMapCursor(&cursor
->mapCursor
, &tempCursor
);
850 traverseFromMapCursor(cursor
->trie
, &tempCursor
, bytes
, capacity
,length
, &stop
, ctx
, callback
);
853 case _kCFBurstTrieCursorTrieType
:
861 #pragma mark Insertion
864 static ListNodeRef
makeCFBurstTrieListNode(const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
) {
865 ListNodeRef node
= (ListNodeRef
) calloc(1, sizeof(*node
) + keylen
+ 1);
866 memcpy(node
->string
, key
, keylen
);
867 node
->string
[keylen
] = 0;
869 node
->length
= keylen
;
870 node
->weight
= weight
;
871 node
->payload
= payload
;
875 static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
) {
877 NextTrie next
= root
->slots
[*key
];
878 ListNodeRef newNode
= makeCFBurstTrieListNode(key
+1, keylen
-1, weight
, payload
);
879 newNode
->weight
= weight
;
880 newNode
->next
= (ListNodeRef
) NextTrie_GetPtr(next
);
881 next
= (uintptr_t) newNode
;
882 NextTrie_SetKind(next
, ListKind
);
883 root
->slots
[*key
] = next
;
885 // ** Handle payload.
886 root
->weight
= weight
;
887 root
->payload
= payload
;
891 static TrieLevelRef
burstCFBurstTrieLevel(CFBurstTrieRef trie
, ListNodeRef list
, uint32_t listCount
) {
892 TrieLevelRef newLevel
= (TrieLevelRef
) calloc(1, sizeof(struct _TrieLevel
));
894 addCFBurstTrieBurstLevel(trie
, newLevel
, list
->string
, list
->length
, list
->weight
, list
->payload
);
895 ListNodeRef temp
= list
;
902 static CFBTInsertCode
addCFBurstTrieListNode(CFBurstTrieRef trie
, ListNodeRef list
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
, uint32_t *listCount
)
904 CFBTInsertCode code
= FailedInsert
;
907 ListNodeRef last
= list
;
909 if (list
->length
== keylen
&& memcmp(key
, list
->string
, keylen
) == 0) {
910 list
->weight
+= weight
;
911 list
->payload
= payload
;
922 last
->next
= makeCFBurstTrieListNode(key
, keylen
, weight
, payload
);
930 static CFBTInsertCode
addCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, const uint8_t *key
, uint32_t keylen
, uint32_t weight
, uint32_t payload
)
932 CFBTInsertCode code
= FailedInsert
;
934 NextTrie next
= root
->slots
[*key
];
935 if (NextTrie_GetKind(next
) == TrieKind
) {
936 TrieLevelRef nextLevel
= (TrieLevelRef
) NextTrie_GetPtr(next
);
937 code
= addCFBurstTrieLevel(trie
, nextLevel
, key
+1, keylen
-1, weight
, payload
);
939 if (NextTrie_GetKind(next
) == ListKind
) {
941 ListNodeRef listNode
= (ListNodeRef
) NextTrie_GetPtr(next
);
942 code
= addCFBurstTrieListNode(trie
, listNode
, key
+1, keylen
-1, weight
, payload
, &listCount
);
943 if (listCount
> trie
->containerSize
) {
944 next
= (uintptr_t) burstCFBurstTrieLevel(trie
, listNode
, listCount
);
945 NextTrie_SetKind(next
, TrieKind
);
948 // ** Make a new list node
949 next
= (uintptr_t) makeCFBurstTrieListNode(key
+1, keylen
-1, weight
, payload
);
950 NextTrie_SetKind(next
, ListKind
);
953 root
->slots
[*key
] = next
;
957 if (!root
->weight
) code
= NewTerm
;
958 else code
= ExistingTerm
;
959 root
->weight
+= weight
;
960 root
->payload
= payload
;
967 #pragma mark Searching
969 static void findCFBurstTrieList(CFBurstTrieRef trie
, TrieCursor
*cursor
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
971 ListNodeRef list
= (ListNodeRef
)NextTrie_GetPtr(cursor
->next
);
972 int len
= cursor
->prefixlen
-cursor
->keylen
;
973 len
= len
<= 0 ? 0 : len
;
975 int lencompare
= list
->length
-len
;
976 if (list
->length
>= len
&&
977 (len
== 0 || memcmp(list
->string
, cursor
->prefix
+cursor
->keylen
, len
) == 0)) {
978 memcpy(cursor
->key
+cursor
->keylen
, list
->string
, list
->length
);
979 cursor
->key
[cursor
->keylen
+list
->length
] = 0;
980 cursor
->next
= (NextTrie
)list
;
981 if (list
->payload
&& callback(ctx
, cursor
->key
, list
->payload
, lencompare
==0)) return;
987 static void findCFBurstTrieMappedPage(CFBurstTrieRef trie
, MapCursor
*cursor
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
989 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
990 uint32_t end
= page
->length
;
992 int len
= cursor
->prefixlen
-cursor
->keylen
;
993 len
= len
<= 0 ? 0 : len
;
994 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
995 uint8_t pfx
[CHARACTER_SET_SIZE
];
996 PageEntryPacked
*lastEntry
= 0;
998 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[cur
];
999 int lencompare
= (entry
->strlen
+entry
->pfxLen
)-len
;
1000 if (lastEntry
&& entry
->pfxLen
>lastEntry
->pfxLen
) memcpy(pfx
+lastEntry
->pfxLen
, lastEntry
->string
, entry
->pfxLen
-lastEntry
->pfxLen
);
1001 if (lencompare
>= 0 &&
1002 (len
== 0 || (__builtin_memcmp(pfx
, cursor
->prefix
+cursor
->keylen
, entry
->pfxLen
) == 0 &&
1003 __builtin_memcmp(entry
->string
, cursor
->prefix
+cursor
->keylen
+entry
->pfxLen
, cursor
->prefixlen
-cursor
->keylen
-entry
->pfxLen
) == 0))) {
1004 memcpy(cursor
->key
+cursor
->keylen
, pfx
, entry
->pfxLen
);
1005 memcpy(cursor
->key
+cursor
->keylen
+entry
->pfxLen
, entry
->string
, entry
->strlen
);
1006 cursor
->key
[cursor
->keylen
+entry
->pfxLen
+entry
->strlen
] = 0;
1007 if (entry
->payload
&& callback(ctx
, (const uint8_t *)cursor
->key
, entry
->payload
, lencompare
==0)) return;
1010 cur
+= sizeof(*entry
) + entry
->strlen
;
1014 PageEntry
*entry
= (PageEntry
*)&page
->data
[cur
];
1015 int lencompare
= entry
->strlen
-len
;
1016 if (lencompare
>= 0 && __builtin_memcmp(entry
->string
, cursor
->prefix
+cursor
->keylen
, len
) == 0) {
1017 memcpy(cursor
->key
+cursor
->keylen
, entry
->string
, entry
->strlen
);
1018 cursor
->key
[cursor
->keylen
+entry
->strlen
] = 0;
1019 if (entry
->payload
&& callback(ctx
, (const uint8_t *)cursor
->key
, entry
->payload
, lencompare
==0)) return;
1021 cur
+= sizeof(*entry
) + entry
->strlen
;
1027 static void findCFBurstTrieLevel(CFBurstTrieRef trie
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1029 if (cursor
->keylen
< cursor
->prefixlen
) {
1030 cursor
->next
= ((TrieLevelRef
)NextTrie_GetPtr(cursor
->next
))->slots
[cursor
->prefix
[cursor
->keylen
]];
1031 cursor
->key
[cursor
->keylen
++] = cursor
->prefix
[cursor
->keylen
];
1033 if (NextTrie_GetKind(cursor
->next
) == TrieKind
) {
1034 findCFBurstTrieLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1035 } else if (NextTrie_GetKind(cursor
->next
) == ListKind
) {
1036 findCFBurstTrieList(trie
, cursor
, ctx
, callback
);
1039 TrieLevelRef root
= (TrieLevelRef
)NextTrie_GetPtr(cursor
->next
);
1040 if (root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1041 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1042 traverseCFBurstTrieLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1046 static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1048 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1049 if (cursor
->keylen
< cursor
->prefixlen
) {
1050 uint8_t mykey
= *(cursor
->prefix
+cursor
->keylen
);
1051 cursor
->key
[cursor
->keylen
++] = *(cursor
->prefix
+cursor
->keylen
);
1053 uint8_t slot
= mykey
/ 64;
1054 uint8_t bit
= mykey
% 64;
1056 uint64_t bword
= root
->bitmap
[slot
];
1058 if (bword
& (1ull << bit
)) {
1059 // ** Count all the set bits up to this bit
1060 for (int i
=0; i
< slot
; i
++) item
+= __builtin_popcountll(root
->bitmap
[i
]);
1061 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1062 uint32_t offset
= root
->slots
[item
];
1063 cursor
->next
= offset
;
1065 if (DiskNextTrie_GetKind(offset
) == TrieKind
) {
1066 findCFBurstTrieMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1067 } else if (DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1068 findCFBurstTrieCompactMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1069 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1070 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1074 if(root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1075 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1076 traverseCFBurstTrieCompactMappedLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1080 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void*, const uint8_t*, uint32_t, bool))
1082 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1083 if (cursor
->keylen
< cursor
->prefixlen
) {
1084 uint8_t slot
= *(cursor
->prefix
+cursor
->keylen
);
1085 cursor
->next
= root
->slots
[slot
];
1086 cursor
->key
[cursor
->keylen
++] = slot
;
1088 if (DiskNextTrie_GetKind(cursor
->next
) == TrieKind
) {
1089 findCFBurstTrieMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1090 } else if (DiskNextTrie_GetKind(cursor
->next
) == CompactTrieKind
) {
1091 findCFBurstTrieCompactMappedLevel(trie
, cursor
, exactmatch
, ctx
, callback
);
1092 } else if (DiskNextTrie_GetKind(cursor
->next
) == ListKind
) {
1093 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1096 if (root
->payload
&& callback(ctx
, cursor
->key
, root
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1097 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1098 traverseCFBurstTrieMappedLevel(trie
, root
, cursor
, exactmatch
, ctx
, callback
);
1102 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie
, TrieLevelRef root
, TrieCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1104 cursor
->key
[cursor
->keylen
] = 0;
1105 uint32_t len
= cursor
->keylen
;
1106 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1107 NextTrie next
= root
->slots
[i
];
1108 cursor
->keylen
= len
;
1109 cursor
->key
[cursor
->keylen
++] = i
;
1111 if (NextTrie_GetKind(next
) == TrieKind
) {
1112 TrieLevelRef level
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1113 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1114 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1115 traverseCFBurstTrieLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1116 } else if (NextTrie_GetKind(next
) == ListKind
) {
1117 cursor
->next
= next
;
1118 cursor
->key
[cursor
->keylen
] = 0;
1119 findCFBurstTrieList(trie
, cursor
, ctx
, callback
);
1124 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie
, MapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1126 cursor
->key
[cursor
->keylen
] = 0;
1127 uint32_t len
= cursor
->keylen
;
1129 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1130 uint32_t offset
= (uint32_t)root
->slots
[i
];
1131 cursor
->keylen
= len
;
1132 cursor
->key
[cursor
->keylen
++] = i
;
1134 if (DiskNextTrie_GetKind(offset
) == TrieKind
) {
1135 MapTrieLevelRef level
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1136 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1137 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1138 traverseCFBurstTrieMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1139 } else if (DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1140 CompactMapTrieLevelRef level
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1141 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1142 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1143 traverseCFBurstTrieCompactMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1144 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1145 cursor
->next
= offset
;
1146 cursor
->key
[cursor
->keylen
] = 0;
1147 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1152 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie
, CompactMapTrieLevelRef root
, MapCursor
*cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool))
1154 cursor
->key
[cursor
->keylen
] = 0;
1155 uint32_t len
= cursor
->keylen
;
1156 for (uint32_t c
=0; c
< CHARACTER_SET_SIZE
; c
++) {
1157 //** This could be optimized to remember what the last slot/item was and not count bits over and over.
1159 uint32_t slot
= mykey
/ 64;
1160 uint32_t bit
= mykey
% 64;
1162 uint64_t bword
= root
->bitmap
[slot
];
1163 cursor
->keylen
= len
;
1165 if (bword
& (1ull << bit
)) {
1166 // ** Count all the set bits up to this bit
1167 for (int i
=0; i
< slot
; i
++) item
+= __builtin_popcountll(root
->bitmap
[i
]);
1168 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1169 uint32_t offset
= root
->slots
[item
];
1170 cursor
->key
[cursor
->keylen
++] = mykey
;
1172 if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1173 CompactMapTrieLevelRef level
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
);
1174 if (level
->payload
&& callback(ctx
, cursor
->key
, level
->payload
, cursor
->prefixlen
==cursor
->keylen
)) return;
1175 if (cursor
->keylen
== cursor
->prefixlen
&& exactmatch
) return;
1176 traverseCFBurstTrieCompactMappedLevel(trie
, level
, cursor
, exactmatch
, ctx
, callback
);
1177 } else if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1178 traverseCFBurstTrieMappedLevel(trie
, (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, offset
), cursor
, exactmatch
, ctx
, callback
);
1179 } else if (DiskNextTrie_GetKind(offset
) == ListKind
) {
1180 cursor
->next
= offset
;
1181 cursor
->key
[cursor
->keylen
] = 0;
1182 findCFBurstTrieMappedPage(trie
, cursor
, ctx
, callback
);
1188 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie
, const uint8_t *prefix
, uint32_t prefixLen
, void **cursor
, bool exactmatch
, void *ctx
, bool (*callback
)(void *, const uint8_t *, uint32_t, bool)) {
1189 if (trie
->mapBase
) {
1190 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
1191 fprintf(stderr
, "Please use CFBurstTrieCursorRef API for file based trie.\n");
1194 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1196 csr
.next
= header
->rootOffset
;
1197 csr
.prefix
= prefix
;
1198 csr
.prefixlen
= prefixLen
;
1201 findCFBurstTrieMappedLevel(trie
, &csr
, exactmatch
, ctx
, callback
);
1205 csr
.next
= ((unsigned long)&trie
->root
)|TrieKind
;
1206 csr
.prefix
= prefix
;
1207 csr
.prefixlen
= prefixLen
;
1210 findCFBurstTrieLevel(trie
, &csr
, exactmatch
, ctx
, callback
);
1214 CF_INLINE
uint32_t getPackedPageEntrySize(PageEntryPacked
*entry
)
1216 return sizeof(PageEntryPacked
) + entry
->strlen
;
1219 CF_INLINE
uint32_t getPageEntrySize(PageEntry
*entry
)
1221 return sizeof(PageEntry
) + entry
->strlen
;
1225 static void _printPageEntry(PageEntryPacked *entry) {
1226 printf("entry 0x%p:\n", entry);
1227 printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen);
1228 if (entry->strlen > 0) {
1229 printf("string = ");
1230 for (int i = 0; i < entry->strlen; ++i)
1231 printf("%d ", entry->string[i]);
1237 static Boolean
advanceCursorOnMappedPageForByte(Page
*page
, CompactMapCursor
*cursor
, UInt8 byte
) {
1238 PageEntryPacked
*entry
;
1239 Boolean found
= FALSE
;
1240 uint32_t minPrefixLength
= 0;
1242 if (cursor
->isOnPage
) {
1243 entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1244 //_printPageEntry(entry);
1245 BOOL shouldContinue
= TRUE
;
1247 if (!(cursor
->entryOffsetInPage
== 0 && entry
->strlen
== 0)) {
1248 if (cursor
->entryOffsetInPage
== entry
->strlen
- 1) {
1249 minPrefixLength
= entry
->pfxLen
+ entry
->strlen
;
1250 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1252 cursor
->offsetInEntry
++;
1253 if (entry
->string
[cursor
->offsetInEntry
] == byte
)
1255 else if (entry
->string
[cursor
->offsetInEntry
] > byte
)
1256 shouldContinue
= FALSE
;
1258 minPrefixLength
= entry
->pfxLen
+ cursor
->offsetInEntry
;
1259 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1265 cursor
->isOnPage
= TRUE
;
1267 } else if (!shouldContinue
)
1270 cursor
->entryOffsetInPage
= 0;
1273 uint32_t pageSize
= page
->length
- sizeof(Page
);
1274 while (cursor
->entryOffsetInPage
< pageSize
) {
1275 entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1276 //_printPageEntry(entry);
1277 if (minPrefixLength
> entry
->pfxLen
)
1279 else if (minPrefixLength
< entry
->pfxLen
)
1280 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1282 if (entry
->strlen
== 0)
1283 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1285 if (entry
->string
[0] > byte
)
1286 // Entries are sorted alphabetically
1288 else if (entry
->string
[0] < byte
)
1289 cursor
->entryOffsetInPage
+= getPackedPageEntrySize(entry
);
1291 cursor
->offsetInEntry
= 0;
1300 cursor
->isOnPage
= TRUE
;
1305 static Boolean
advanceCursorMappedPageWithPerfixCompression(Page
*page
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1308 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[0];
1309 if (!cursor
->isOnPage
) {
1310 cursor
->entryOffsetInPage
= 0;
1311 cursor
->offsetInEntry
= 0;
1312 cursor
->isOnPage
= entry
->pfxLen
== 0 && entry
->strlen
== 0;
1314 getMapCursorPayloadFromPackedPageEntry(entry
, cursor
, &cursor
->payload
);
1318 for (CFIndex i
= 0; i
< length
; ++i
) {
1319 if (!advanceCursorOnMappedPageForByte(page
, cursor
, bytes
[i
]))
1322 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[cursor
->entryOffsetInPage
];
1323 getMapCursorPayloadFromPackedPageEntry(entry
, cursor
, &cursor
->payload
);
1327 static Boolean
advanceCursorMappedPageSortedByKey(Page
*page
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1330 PageEntry
*entry
= (PageEntry
*)&page
->data
[0];
1331 if (!cursor
->isOnPage
) {
1332 cursor
->entryOffsetInPage
= 0;
1333 cursor
->offsetInEntry
= 0;
1334 cursor
->isOnPage
= entry
->strlen
== 0;
1336 getMapCursorPayloadFromPageEntry(entry
, cursor
, &cursor
->payload
);
1341 uint32_t pageSize
= page
->length
- sizeof(Page
);
1342 const UInt8
* prefix
= NULL
;
1343 uint32_t prefixLength
= 0;
1345 if (cursor
->isOnPage
) {
1346 entry
= (PageEntry
*)&page
->data
[cursor
->entryOffsetInPage
];
1347 prefix
= entry
->string
;
1348 prefixLength
= cursor
->offsetInEntry
+ 1;
1351 while (cursor
->entryOffsetInPage
< pageSize
) {
1352 PageEntry
*entry
= (PageEntry
*)&page
->data
[cursor
->entryOffsetInPage
];
1353 if (entry
->strlen
== 0) {
1354 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1358 if (entry
->strlen
<= prefixLength
)
1362 if (prefixLength
> 0)
1363 cmpResult
= __builtin_memcmp(entry
->string
, prefix
, prefixLength
);
1366 else if (cmpResult
== 0) {
1367 if (entry
->strlen
< prefixLength
+ length
) {
1368 int cmpResult2
= __builtin_memcmp(entry
->string
+ prefixLength
, bytes
, entry
->strlen
- prefixLength
);
1372 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1374 int cmpResult2
= __builtin_memcmp(entry
->string
+ prefixLength
, bytes
, length
);
1377 else if (cmpResult2
== 0) {
1378 cursor
->isOnPage
= TRUE
;
1379 cursor
->offsetInEntry
= prefixLength
+ length
- 1;
1380 getMapCursorPayloadFromPageEntry(entry
, cursor
, &cursor
->payload
);
1383 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1386 cursor
->entryOffsetInPage
+= getPageEntrySize(entry
);
1392 static Boolean
advanceCursorMappedPage(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1394 if (!bytes
|| length
< 0)
1397 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1398 uint32_t pageSize
= page
->length
- sizeof(Page
);
1402 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1403 return advanceCursorMappedPageSortedByKey(page
, cursor
, bytes
, length
);
1404 else if (trie
->cflags
& kCFBurstTriePrefixCompression
)
1405 return advanceCursorMappedPageWithPerfixCompression(page
, cursor
, bytes
, length
);
1410 static Boolean
advanceCursorCompactMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1412 if (!bytes
|| length
< 0)
1415 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1417 cursor
->payload
= root
->payload
;
1421 uint8_t slot
= bytes
[0] / 64;
1422 uint8_t bit
= bytes
[0] % 64;
1424 uint64_t bword
= root
->bitmap
[slot
];
1425 if (bword
& (1ull << bit
)) {
1426 // Count all the set bits up to this bit
1427 for (int i
= 0; i
< slot
; ++i
)
1428 item
+= __builtin_popcountll(root
->bitmap
[i
]);
1429 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1430 cursor
->next
= root
->slots
[item
];
1431 return advanceMapCursor(trie
, cursor
, bytes
+ 1, length
- 1);
1437 static Boolean
advanceCursorMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1439 if (!bytes
|| length
< 0)
1442 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1444 cursor
->payload
= root
->payload
;
1448 cursor
->next
= root
->slots
[bytes
[0]];
1449 return advanceMapCursor(trie
, cursor
, bytes
+ 1, length
- 1);
1452 static Boolean
advanceMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, const UInt8
* bytes
, CFIndex length
)
1454 bool result
= FALSE
;
1455 switch (DiskNextTrie_GetKind(cursor
->next
)) {
1457 result
= advanceCursorMappedLevel(trie
, cursor
, bytes
, length
);
1459 case CompactTrieKind
:
1460 result
= advanceCursorCompactMappedLevel(trie
, cursor
, bytes
, length
);
1463 result
= advanceCursorMappedPage(trie
, cursor
, bytes
, length
);
1466 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1467 if (cursor
->next
== header
->rootOffset
)
1468 result
= advanceCursorMappedLevel(trie
, cursor
, bytes
, length
);
1475 static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1477 MapTrieLevelRef root
= (MapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1478 if (root
->payload
) {
1479 callback(ctx
, bytes
, length
, root
->payload
, stop
);
1484 if (length
>= capacity
)
1487 for (int i
= 0; i
< CHARACTER_SET_SIZE
; ++i
) {i
=
1489 cursor
->next
= (uint32_t)root
->slots
[i
];;
1490 cursor
->isOnPage
= FALSE
;
1491 cursor
->entryOffsetInPage
= 0;
1492 cursor
->offsetInEntry
= 0;
1493 cursor
->payload
= 0;
1494 traverseFromMapCursor(trie
, cursor
, bytes
, capacity
- 1, length
+ 1, stop
, ctx
, callback
);
1500 static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1502 CompactMapTrieLevelRef root
= (CompactMapTrieLevelRef
)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1503 if (root
->payload
) {
1504 callback(ctx
, bytes
, length
, root
->payload
, stop
);
1509 if (length
>= capacity
)
1511 for (int c
= 0; c
< CHARACTER_SET_SIZE
; ++c
) {
1513 //This could be optimized to remember what the last slot/item was and not count bits over and over.
1514 uint8_t slot
= c
/ 64;
1515 uint8_t bit
= c
% 64;
1517 uint64_t bword
= root
->bitmap
[slot
];
1518 if (bword
& (1ull << bit
)) {
1519 // Count all the set bits up to this bit
1520 for (int i
= 0; i
< slot
; ++i
)
1521 item
+= __builtin_popcountll(root
->bitmap
[i
]);
1522 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1523 cursor
->next
= root
->slots
[item
];
1524 cursor
->isOnPage
= FALSE
;
1525 cursor
->entryOffsetInPage
= 0;
1526 cursor
->offsetInEntry
= 0;
1527 cursor
->payload
= 0;
1528 traverseFromMapCursor(trie
, cursor
, bytes
, capacity
- 1, length
+ 1, stop
, ctx
, callback
);
1535 static void traverseFromMapCursorMappedPageWithPrefixCompression(Page
*page
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1537 uint32_t pageSize
= page
->length
- sizeof(Page
);
1538 uint32_t offset
= cursor
->entryOffsetInPage
;
1539 uint32_t minPrefixLength
= 0;
1540 if (cursor
->isOnPage
) {
1541 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[offset
];
1542 int32_t remainingLength
= entry
->strlen
- cursor
->offsetInEntry
- 1;
1543 if (remainingLength
>= 0 && remainingLength
<= capacity
) {
1544 memcpy(bytes
+ length
, entry
->string
+ cursor
->offsetInEntry
+ 1, remainingLength
);
1545 callback(ctx
, bytes
, length
+ remainingLength
, entry
->payload
, stop
);
1549 minPrefixLength
= entry
->pfxLen
+ cursor
->offsetInEntry
;
1550 offset
+= getPackedPageEntrySize(entry
);
1552 PageEntryPacked
*previousEntry
= NULL
;
1553 while (offset
< pageSize
) {
1554 PageEntryPacked
*entry
= (PageEntryPacked
*)&page
->data
[offset
];
1555 if (minPrefixLength
> entry
->pfxLen
)
1557 else if (entry
->payload
&& entry
->strlen
<= capacity
) {
1559 length
-= previousEntry
->strlen
+ previousEntry
->pfxLen
- entry
->pfxLen
;
1560 memcpy(bytes
+ length
, entry
->string
, entry
->strlen
);
1561 callback(ctx
, bytes
, length
+ entry
->strlen
, entry
->payload
, stop
);
1562 length
+= entry
->strlen
;
1566 previousEntry
= entry
;
1567 offset
+= getPackedPageEntrySize(entry
);
1571 static void traverseFromMapCursorMappedPageSortedByKey(Page
*page
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1573 uint32_t pageSize
= page
->length
- sizeof(Page
);
1574 uint32_t offset
= cursor
->entryOffsetInPage
;
1575 uint32_t prefixLength
= 0;
1576 const UInt8
*prefix
= NULL
;
1577 if (cursor
->isOnPage
) {
1578 PageEntry
*entry
= (PageEntry
*)&page
->data
[offset
];
1579 int32_t remainingLength
= entry
->strlen
- cursor
->offsetInEntry
- 1;
1580 if (remainingLength
>= 0 && remainingLength
<= capacity
) {
1581 memcpy(bytes
+ length
, entry
->string
+ cursor
->offsetInEntry
, remainingLength
);
1582 callback(ctx
, bytes
, length
+ remainingLength
, entry
->payload
, stop
);
1586 prefixLength
= cursor
->offsetInEntry
+ 1;
1587 prefix
= entry
->string
;
1588 offset
+= getPageEntrySize(entry
);
1591 while (offset
< pageSize
) {
1592 PageEntry
*entry
= (PageEntry
*)&page
->data
[offset
];
1593 if (entry
->strlen
< prefixLength
)
1596 int cmpResult
= __builtin_memcmp(entry
->string
, prefix
, prefixLength
);
1599 else if (entry
->payload
&& entry
->strlen
<= capacity
) {
1600 if (entry
->strlen
> 0)
1601 memcpy(bytes
+ length
, entry
->string
+ prefixLength
, entry
->strlen
- prefixLength
);
1602 callback(ctx
, bytes
, length
+ entry
->strlen
- prefixLength
, entry
->payload
, stop
);
1606 offset
+= getPageEntrySize(entry
);
1611 static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1613 Page
*page
= (Page
*)DiskNextTrie_GetPtr(trie
->mapBase
, cursor
->next
);
1614 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1615 traverseFromMapCursorMappedPageSortedByKey(page
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1616 else if (trie
->cflags
& kCFBurstTriePrefixCompression
)
1617 traverseFromMapCursorMappedPageWithPrefixCompression(page
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1620 void traverseFromMapCursor(CFBurstTrieRef trie
, CompactMapCursor
*cursor
, UInt8
* bytes
, uint32_t capacity
, uint32_t length
, Boolean
*stop
, void *ctx
, CFBurstTrieTraversalCallback callback
)
1622 switch (DiskNextTrie_GetKind(cursor
->next
)) {
1624 traverseFromMapCursorMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1626 case CompactTrieKind
:
1627 traverseFromMapCursorCompactMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1630 traverseFromMapCursorMappedPage(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1633 TrieHeader
*header
= (TrieHeader
*)trie
->mapBase
;
1634 if (cursor
->next
== header
->rootOffset
) {
1635 traverseFromMapCursorMappedLevel(trie
, cursor
, bytes
, capacity
, length
, stop
, ctx
, callback
);
1642 void copyMapCursor(const CompactMapCursor
*source
, CompactMapCursor
* destination
)
1644 destination
->next
= source
->next
;
1645 destination
->entryOffsetInPage
= source
->entryOffsetInPage
;
1646 destination
->offsetInEntry
= source
->offsetInEntry
;
1647 destination
->isOnPage
= source
->isOnPage
;
1648 destination
->payload
= source
->payload
;
1651 Boolean
areMapCursorsEqual(const CompactMapCursor
*lhs
, const CompactMapCursor
*rhs
)
1653 return lhs
->entryOffsetInPage
== rhs
->entryOffsetInPage
&& lhs
->isOnPage
== rhs
->isOnPage
&& lhs
->next
== rhs
->next
&& lhs
->offsetInEntry
== rhs
->offsetInEntry
;
1656 static Boolean
getMapCursorPayloadFromPackedPageEntry(PageEntryPacked
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1660 if (cursor
->entryOffsetInPage
== 0 && cursor
->offsetInEntry
== 0 && entry
->strlen
== 0) {
1662 *payload
= entry
->payload
;
1664 } else if (cursor
->offsetInEntry
!= entry
->strlen
- 1)
1668 *payload
= entry
->payload
;
1673 static Boolean
getMapCursorPayloadFromPageEntry(PageEntry
*entry
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1677 if (cursor
->entryOffsetInPage
== 0 && cursor
->offsetInEntry
== 0 && entry
->strlen
== 0) {
1679 *payload
= entry
->payload
;
1681 } else if (cursor
->offsetInEntry
!= entry
->strlen
- 1)
1685 *payload
= entry
->payload
;
1690 Boolean
getMapCursorPayload(CFBurstTrieRef trie
, const CompactMapCursor
*cursor
, uint32_t *payload
)
1694 if (cursor
->payload
) {
1696 *payload
= cursor
->payload
;
1704 static Boolean
burstTrieMappedFind(DiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1705 Boolean success
= false;
1707 uint32_t offset
= CFSwapInt32LittleToHost((uint32_t)trie
->slots
[*key
]);
1708 if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1709 return burstTrieMappedFind((DiskTrieLevelRef
)DiskNextTrie_GetPtr(map
,offset
), map
, key
+1, length
-1, payload
, prefix
);
1710 } else if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1711 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1713 if(DiskNextTrie_GetKind(offset
) == ListKind
) {
1714 return burstTrieMappedPageFind((StringPage
*)DiskNextTrie_GetPtr(map
, offset
), key
+1, length
-1, payload
, prefix
);
1721 SetPayload(payload
, CFSwapInt32LittleToHost(trie
->payload
));
1728 static Boolean
burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie
, char *map
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1729 Boolean success
= false;
1731 uint32_t mykey
= *key
;
1732 uint32_t slot
= mykey
/ 64;
1733 uint32_t bit
= mykey
% 64;
1735 uint64_t bword
= CFSwapInt64LittleToHost(trie
->bitmap
[slot
]);
1736 if (bword
& (1ull << bit
)) {
1737 /* Count all the set bits up to this bit */
1738 for (int i
=0; i
< slot
; i
++) {
1739 item
+= __builtin_popcountll(trie
->bitmap
[i
]);
1741 item
+= __builtin_popcountll(bword
& ((1ull << bit
)-1));
1742 uint32_t offset
= CFSwapInt32LittleToHost((uint32_t)trie
->slots
[item
]);
1743 if(DiskNextTrie_GetKind(offset
) == TrieKind
) {
1744 return burstTrieMappedFind((DiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1745 } else if(DiskNextTrie_GetKind(offset
) == CompactTrieKind
) {
1746 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef
)DiskNextTrie_GetPtr(map
, offset
), map
, key
+1, length
-1, payload
, prefix
);
1749 if(DiskNextTrie_GetKind(offset
) == ListKind
) {
1750 return burstTrieMappedPageFind((StringPage
*)DiskNextTrie_GetPtr(map
, offset
), key
+1, length
-1, payload
, prefix
);
1758 SetPayload(payload
, CFSwapInt32LittleToHost(trie
->payload
));
1765 static Boolean
burstTrieMappedPageFind(StringPage
*page
, const UInt8
*key
, uint32_t length
, uint32_t *payload
, bool prefix
) {
1766 Boolean success
= false;
1767 uint32_t end
= CFSwapInt32LittleToHost(page
->length
);
1772 StringPageEntryPacked
*entry
= (StringPageEntryPacked
*)&page
->data
[cur
];
1773 uint16_t strlen
= entry
->pfxLen
+CFSwapInt16LittleToHost(entry
->strlen
);
1774 if (strlen
== length
&& __builtin_memcmp(pfx
, key
, entry
->pfxLen
) == 0 && __builtin_memcmp(entry
->string
, key
+entry
->pfxLen
, length
-entry
->pfxLen
) == 0) {
1775 SetPayload(payload
, CFSwapInt32LittleToHost(entry
->payload
));
1779 memcpy(pfx
+entry
->pfxLen
, entry
->string
, MIN(255-entry
->pfxLen
, length
-entry
->pfxLen
));
1780 cur
+= sizeof(*entry
) + strlen
- entry
->pfxLen
;
1785 StringPageEntry
*entry
= (StringPageEntry
*)&page
->data
[cur
];
1786 uint16_t strlen
= CFSwapInt16LittleToHost(entry
->strlen
);
1787 if (strlen
== length
&& __builtin_memcmp(entry
->string
, key
, length
) == 0) {
1788 SetPayload(payload
, CFSwapInt32LittleToHost(entry
->payload
));
1792 cur
+= sizeof(*entry
) + strlen
;
1803 #pragma mark Serialization
1806 static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie
, TrieLevelRef root
, uint32_t *offset
, off_t start_offset
, bool dispose
, bool isroot
, int fd
)
1811 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) if (root
->slots
[i
]) count
++;
1813 uint32_t this_offset
= *offset
;
1815 if ((trie
->cflags
& kCFBurstTrieBitmapCompression
) && count
< MAX_BITMAP_SIZE
&& !isroot
) {
1816 size_t size
= sizeof(CompactMapTrieLevel
) + sizeof(uint32_t) * count
;
1819 CompactMapTrieLevel
*maptrie
= (CompactMapTrieLevel
*)alloca(size
);
1820 bzero(maptrie
, size
);
1823 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1824 NextTrie next
= root
->slots
[i
];
1826 uint32_t slot
= i
/ 64;
1827 uint32_t bit
= i
% 64;
1828 maptrie
->bitmap
[slot
] |= 1ull<<bit
;
1829 if (NextTrie_GetKind(next
) == TrieKind
) {
1830 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1831 uint32_t childOffset
= *offset
;
1832 if (serializeCFBurstTrieLevels(trie
, nextLevel
, offset
, start_offset
, true, false, fd
)) {
1833 maptrie
->slots
[offsetSlot
] = (TrieKind
|childOffset
);
1835 maptrie
->slots
[offsetSlot
] = (CompactTrieKind
|childOffset
);
1838 maptrie
->slots
[offsetSlot
] = next
;
1843 maptrie
->payload
= root
->payload
;
1846 for (int i
=0; i
< 4; i
++) bitcount
+= __builtin_popcountll(maptrie
->bitmap
[i
]);
1847 assert(bitcount
== count
);
1849 pwrite(fd
, maptrie
, size
, this_offset
+start_offset
);
1852 MapTrieLevel maptrie
;
1853 *offset
+= sizeof(maptrie
);
1855 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1856 NextTrie next
= root
->slots
[i
];
1857 if (NextTrie_GetKind(next
) == TrieKind
) {
1858 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1859 uint32_t childOffset
= *offset
;
1860 if (serializeCFBurstTrieLevels(trie
, nextLevel
, offset
, start_offset
, true, false, fd
)) {
1861 maptrie
.slots
[i
] = (TrieKind
|childOffset
);
1863 maptrie
.slots
[i
] = (CompactTrieKind
|childOffset
);
1866 maptrie
.slots
[i
] = next
;
1869 maptrie
.payload
= root
->payload
;
1870 pwrite(fd
, &maptrie
, sizeof(maptrie
), this_offset
+start_offset
);
1873 if (dispose
) free(root
);
1877 static void serializeCFBurstTrieList(CFBurstTrieRef trie
, ListNodeRef listNode
, int fd
)
1880 size_t size
= trie
->containerSize
;
1882 // ** Temp list of nodes to sort
1883 ListNodeRef
*nodes
= (ListNodeRef
*)malloc(sizeof(ListNodeRef
) * size
);
1884 for (listCount
= 0; listNode
; listCount
++) {
1885 if (listCount
>= size
) {
1887 nodes
= (ListNodeRef
*)realloc(nodes
, sizeof(ListNodeRef
) * size
);
1889 nodes
[listCount
] = listNode
;
1890 listNode
= listNode
->next
;
1893 char _buffer
[MAX_BUFFER_SIZE
];
1894 char bufferSize
= (sizeof(Page
) + size
* (sizeof(PageEntryPacked
) + MAX_STRING_SIZE
));
1895 char *buffer
= bufferSize
< MAX_BUFFER_SIZE
? _buffer
: (char *) malloc(bufferSize
);
1897 Page
*page
= (Page
*)buffer
;
1898 uint32_t current
= 0;
1900 if (trie
->cflags
& kCFBurstTriePrefixCompression
) {
1901 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeStringCompare
);
1903 ListNodeRef last
= 0;
1904 for (int i
=0; i
< listCount
; i
++) {
1905 listNode
= nodes
[i
];
1909 pfxLen
< CHARACTER_SET_SIZE
-1 &&
1910 pfxLen
< listNode
->length
&&
1911 pfxLen
< last
->length
&&
1912 listNode
->string
[pfxLen
] == last
->string
[pfxLen
];
1916 PageEntryPacked
*entry
= (PageEntryPacked
*)(&page
->data
[current
]);
1917 entry
->strlen
= listNode
->length
- pfxLen
;
1918 entry
->payload
= listNode
->payload
;
1919 entry
->pfxLen
= pfxLen
;
1920 memcpy(entry
->string
, listNode
->string
+pfxLen
, listNode
->length
-pfxLen
);
1921 current
+= listNode
->length
- pfxLen
+ sizeof(PageEntryPacked
);
1925 if (trie
->cflags
& kCFBurstTrieSortByKey
)
1926 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeStringCompare
);
1928 qsort(nodes
, listCount
, sizeof(ListNodeRef
), nodeWeightCompare
);
1930 for (int i
=0; i
< listCount
; i
++) {
1931 listNode
= nodes
[i
];
1932 PageEntry
*entry
= (PageEntry
*)(&page
->data
[current
]);
1933 entry
->strlen
= listNode
->length
;
1934 entry
->payload
= listNode
->payload
;
1935 memcpy(entry
->string
, listNode
->string
, listNode
->length
);
1936 current
+= listNode
->length
+ sizeof(PageEntry
);
1940 size_t len
= (sizeof(Page
) + current
+ 3) & ~3;
1941 page
->length
= current
;
1942 write(fd
, page
, len
);
1945 if (buffer
!= _buffer
) free(buffer
);
1948 static void serializeCFBurstTrieLists(CFBurstTrieRef trie
, TrieLevelRef root
, off_t start_offset
, int fd
)
1950 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
1951 NextTrie next
= root
->slots
[i
];
1953 if (NextTrie_GetKind(next
) == TrieKind
) {
1954 TrieLevelRef nextLevel
= (TrieLevelRef
)NextTrie_GetPtr(next
);
1955 serializeCFBurstTrieLists(trie
, nextLevel
, start_offset
, fd
);
1957 if (NextTrie_GetKind(next
) == ListKind
) {
1958 ListNodeRef listNode
= (ListNodeRef
)NextTrie_GetPtr(next
);
1959 offset
= lseek(fd
, 0, SEEK_CUR
) - start_offset
;
1960 serializeCFBurstTrieList(trie
, listNode
, fd
);
1961 finalizeCFBurstTrieList(listNode
);
1962 //assert((offset & 3)==0);
1963 root
->slots
[i
] = (offset
|ListKind
);
1969 static size_t serializeCFBurstTrie(CFBurstTrieRef trie
, size_t start_offset
, int fd
)
1972 header
.signature
= 0x0ddba11;
1973 header
.rootOffset
= 0;
1974 header
.count
= trie
->count
;
1976 header
.flags
= trie
->cflags
;
1977 header
.reserved
[0] = 0;
1980 lseek(fd
, start_offset
, SEEK_SET
);
1982 size_t header_size
= sizeof(header
);
1983 write(fd
, &header
, header_size
);
1985 serializeCFBurstTrieLists(trie
, &trie
->root
, start_offset
, fd
);
1987 offset
= lseek(fd
, 0, SEEK_CUR
) - start_offset
;
1988 size_t off
= offsetof(TrieHeader
, rootOffset
);
1989 size_t offsize
= sizeof(offset
);
1990 pwrite(fd
, &offset
, offsize
, off
+start_offset
);
1992 serializeCFBurstTrieLevels(trie
, &trie
->root
, &offset
, start_offset
, false, true, fd
);
1994 size_t off2
= offsetof(TrieHeader
, size
);
1995 offsize
= sizeof(offset
);
1996 pwrite(fd
, &offset
, offsize
, off2
+start_offset
);
1998 offset
= lseek(fd
, 0, SEEK_END
);
1999 return (size_t)(offset
-start_offset
);
2004 #pragma mark Release
2007 static void destroyCFBurstTrie(CFBurstTrieRef trie
) {
2008 if (trie
->mapBase
) {
2009 #if DEPLOYMENT_TARGET_WINDOWS
2010 UnmapViewOfFile(trie
->mapBase
);
2011 CloseHandle(trie
->mapHandle
);
2012 CloseHandle(trie
->mappedFileHandle
);
2014 munmap(trie
->mapBase
, trie
->mapSize
);
2017 finalizeCFBurstTrie(&trie
->root
);
2023 static void finalizeCFBurstTrie(TrieLevelRef trie
) {
2024 for (int i
=0; i
< CHARACTER_SET_SIZE
; i
++) {
2025 if (NextTrie_GetKind(trie
->slots
[i
]) == TrieKind
) {
2026 finalizeCFBurstTrie((TrieLevelRef
)NextTrie_GetPtr(trie
->slots
[i
]));
2027 free((void *)NextTrie_GetPtr(trie
->slots
[i
]));
2028 } else if (NextTrie_GetKind(trie
->slots
[i
]) == ListKind
) {
2029 finalizeCFBurstTrieList((ListNodeRef
)NextTrie_GetPtr(trie
->slots
[i
]));
2034 static void finalizeCFBurstTrieList(ListNodeRef node
) {
2036 ListNodeRef next
= node
->next
;
2044 #pragma mark Helpers
2047 static int nodeWeightCompare(const void *a
, const void *b
) {
2048 ListNodeRef nodeA
= *(ListNodeRef
*)a
;
2049 ListNodeRef nodeB
= *(ListNodeRef
*)b
;
2050 return (nodeB
->weight
- nodeA
->weight
);
2053 static int nodeStringCompare(const void *a
, const void *b
) {
2054 ListNodeRef nodeA
= *(ListNodeRef
*)a
;
2055 ListNodeRef nodeB
= *(ListNodeRef
*)b
;
2056 int result
= memcmp((char *)nodeA
->string
, (char *)nodeB
->string
, MIN(nodeA
->length
, nodeB
->length
));
2057 if (result
== 0) result
= nodeA
->length
-nodeB
->length
;
2061 bool containsKey(void *context
, const uint8_t *key
, uint32_t payload
, bool exact
)
2063 uint32_t *ctx
= (uint32_t *)context
;
2064 if (exact
) *ctx
= payload
;
2068 static CFIndex
burstTrieConvertCharactersToUTF8(UniChar
*chars
, CFIndex numChars
, UInt8
*buffer
) {
2070 for (i
= j
= 0; i
< numChars
; i
++) {
2071 UniChar c
= chars
[i
];
2072 if (CFStringIsSurrogateHighCharacter(c
) && i
+ 1 < numChars
&& CFStringIsSurrogateLowCharacter(chars
[i
+ 1])) {
2073 UTF32Char lc
= CFStringGetLongCharacterForSurrogatePair(c
, chars
[i
+ 1]);
2074 buffer
[j
++] = 0xf0 + (lc
>> 18);
2075 buffer
[j
++] = 0x80 + ((lc
& 0x3ffff) >> 12);
2076 buffer
[j
++] = 0x80 + ((lc
& 0xfff) >> 6);
2077 buffer
[j
++] = 0x80 + (lc
& 0x3f);
2082 } else if (c
< 0x800) {
2083 buffer
[j
++] = 0xc0 + (c
>> 6);
2084 buffer
[j
++] = 0x80 + (c
& 0x3f);
2086 buffer
[j
++] = 0xe0 + (c
>> 12);
2087 buffer
[j
++] = 0x80 + ((c
& 0xfff) >> 6);
2088 buffer
[j
++] = 0x80 + (c
& 0x3f);