]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | ****************************************************************************** | |
5 | * | |
6 | * Copyright (C) 2001-2008, International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ****************************************************************************** | |
10 | * file name: utrie2_impl.h | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
729e4ab9 A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2008sep26 (split off from utrie2.c) | |
16 | * created by: Markus W. Scherer | |
17 | * | |
18 | * Definitions needed for both runtime and builder code for UTrie2, | |
19 | * used by utrie2.c and utrie2_builder.c. | |
20 | */ | |
21 | ||
22 | #ifndef __UTRIE2_IMPL_H__ | |
23 | #define __UTRIE2_IMPL_H__ | |
24 | ||
25 | #include "utrie2.h" | |
26 | ||
27 | /* Public UTrie2 API implementation ----------------------------------------- */ | |
28 | ||
29 | /* | |
30 | * These definitions are mostly needed by utrie2.c, | |
31 | * but also by utrie2_serialize() and utrie2_swap(). | |
32 | */ | |
33 | ||
34 | /* | |
35 | * UTrie and UTrie2 signature values, | |
36 | * in platform endianness and opposite endianness. | |
37 | */ | |
38 | #define UTRIE_SIG 0x54726965 | |
39 | #define UTRIE_OE_SIG 0x65697254 | |
40 | ||
41 | #define UTRIE2_SIG 0x54726932 | |
42 | #define UTRIE2_OE_SIG 0x32697254 | |
43 | ||
44 | /** | |
45 | * Trie data structure in serialized form: | |
46 | * | |
47 | * UTrie2Header header; | |
48 | * uint16_t index[header.index2Length]; | |
49 | * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] | |
50 | * @internal | |
51 | */ | |
52 | typedef struct UTrie2Header { | |
53 | /** "Tri2" in big-endian US-ASCII (0x54726932) */ | |
54 | uint32_t signature; | |
55 | ||
56 | /** | |
57 | * options bit field: | |
58 | * 15.. 4 reserved (0) | |
59 | * 3.. 0 UTrie2ValueBits valueBits | |
60 | */ | |
61 | uint16_t options; | |
62 | ||
63 | /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ | |
64 | uint16_t indexLength; | |
65 | ||
66 | /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ | |
67 | uint16_t shiftedDataLength; | |
68 | ||
69 | /** Null index and data blocks, not shifted. */ | |
70 | uint16_t index2NullOffset, dataNullOffset; | |
71 | ||
72 | /** | |
73 | * First code point of the single-value range ending with U+10ffff, | |
74 | * rounded up and then shifted right by UTRIE2_SHIFT_1. | |
75 | */ | |
76 | uint16_t shiftedHighStart; | |
77 | } UTrie2Header; | |
78 | ||
79 | /** | |
80 | * Constants for use with UTrie2Header.options. | |
81 | * @internal | |
82 | */ | |
83 | enum { | |
84 | /** Mask to get the UTrie2ValueBits valueBits from options. */ | |
85 | UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf | |
86 | }; | |
87 | ||
88 | /* Building a trie ---------------------------------------------------------- */ | |
89 | ||
90 | /* | |
91 | * These definitions are mostly needed by utrie2_builder.c, but also by | |
92 | * utrie2_get32() and utrie2_enum(). | |
93 | */ | |
94 | ||
95 | enum { | |
96 | /** | |
97 | * At build time, leave a gap in the index-2 table, | |
98 | * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table | |
99 | * and the supplementary index-1 table. | |
100 | * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. | |
101 | */ | |
102 | UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, | |
103 | UNEWTRIE2_INDEX_GAP_LENGTH= | |
104 | ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)& | |
105 | ~UTRIE2_INDEX_2_MASK, | |
106 | ||
107 | /** | |
108 | * Maximum length of the build-time index-2 array. | |
109 | * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, | |
110 | * plus the part of the index-2 table for lead surrogate code points, | |
111 | * plus the build-time index gap, | |
112 | * plus the null index-2 block. | |
113 | */ | |
114 | UNEWTRIE2_MAX_INDEX_2_LENGTH= | |
115 | (0x110000>>UTRIE2_SHIFT_2)+ | |
116 | UTRIE2_LSCP_INDEX_2_LENGTH+ | |
117 | UNEWTRIE2_INDEX_GAP_LENGTH+ | |
118 | UTRIE2_INDEX_2_BLOCK_LENGTH, | |
119 | ||
120 | UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1 | |
121 | }; | |
122 | ||
123 | /** | |
124 | * Maximum length of the build-time data array. | |
125 | * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, | |
126 | * plus values for the 0x400 surrogate code units. | |
127 | */ | |
128 | #define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400) | |
129 | ||
130 | /* | |
131 | * Build-time trie structure. | |
132 | * | |
133 | * Just using a boolean flag for "repeat use" could lead to data array overflow | |
134 | * because we would not be able to detect when a data block becomes unused. | |
135 | * It also leads to orphan data blocks that are kept through serialization. | |
136 | * | |
137 | * Need to use reference counting for data blocks, | |
138 | * and allocDataBlock() needs to look for a free block before increasing dataLength. | |
139 | * | |
140 | * This scheme seems like overkill for index-2 blocks since the whole index array is | |
141 | * preallocated anyway (unlike the growable data array). | |
142 | * Just allocating multiple index-2 blocks as needed. | |
143 | */ | |
144 | struct UNewTrie2 { | |
145 | int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; | |
146 | int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; | |
147 | uint32_t *data; | |
148 | ||
149 | uint32_t initialValue, errorValue; | |
150 | int32_t index2Length, dataCapacity, dataLength; | |
151 | int32_t firstFreeBlock; | |
152 | int32_t index2NullOffset, dataNullOffset; | |
153 | UChar32 highStart; | |
154 | UBool isCompacted; | |
155 | ||
156 | /** | |
157 | * Multi-purpose per-data-block table. | |
158 | * | |
159 | * Before compacting: | |
160 | * | |
161 | * Per-data-block reference counters/free-block list. | |
162 | * 0: unused | |
163 | * >0: reference counter (number of index-2 entries pointing here) | |
164 | * <0: next free data block in free-block list | |
165 | * | |
166 | * While compacting: | |
167 | * | |
168 | * Map of adjusted indexes, used in compactData() and compactIndex2(). | |
169 | * Maps from original indexes to new ones. | |
170 | */ | |
171 | int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; | |
172 | }; | |
173 | ||
174 | #endif |