]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ****************************************************************************** | |
3 | * | |
4 | * Copyright (C) 2001-2008, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ****************************************************************************** | |
8 | * file name: utrie2_impl.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2008sep26 (split off from utrie2.c) | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Definitions needed for both runtime and builder code for UTrie2, | |
17 | * used by utrie2.c and utrie2_builder.c. | |
18 | */ | |
19 | ||
20 | #ifndef __UTRIE2_IMPL_H__ | |
21 | #define __UTRIE2_IMPL_H__ | |
22 | ||
23 | #include "utrie2.h" | |
24 | ||
25 | /* Public UTrie2 API implementation ----------------------------------------- */ | |
26 | ||
27 | /* | |
28 | * These definitions are mostly needed by utrie2.c, | |
29 | * but also by utrie2_serialize() and utrie2_swap(). | |
30 | */ | |
31 | ||
32 | /* | |
33 | * UTrie and UTrie2 signature values, | |
34 | * in platform endianness and opposite endianness. | |
35 | */ | |
36 | #define UTRIE_SIG 0x54726965 | |
37 | #define UTRIE_OE_SIG 0x65697254 | |
38 | ||
39 | #define UTRIE2_SIG 0x54726932 | |
40 | #define UTRIE2_OE_SIG 0x32697254 | |
41 | ||
42 | /** | |
43 | * Trie data structure in serialized form: | |
44 | * | |
45 | * UTrie2Header header; | |
46 | * uint16_t index[header.index2Length]; | |
47 | * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] | |
48 | * @internal | |
49 | */ | |
50 | typedef struct UTrie2Header { | |
51 | /** "Tri2" in big-endian US-ASCII (0x54726932) */ | |
52 | uint32_t signature; | |
53 | ||
54 | /** | |
55 | * options bit field: | |
56 | * 15.. 4 reserved (0) | |
57 | * 3.. 0 UTrie2ValueBits valueBits | |
58 | */ | |
59 | uint16_t options; | |
60 | ||
61 | /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ | |
62 | uint16_t indexLength; | |
63 | ||
64 | /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ | |
65 | uint16_t shiftedDataLength; | |
66 | ||
67 | /** Null index and data blocks, not shifted. */ | |
68 | uint16_t index2NullOffset, dataNullOffset; | |
69 | ||
70 | /** | |
71 | * First code point of the single-value range ending with U+10ffff, | |
72 | * rounded up and then shifted right by UTRIE2_SHIFT_1. | |
73 | */ | |
74 | uint16_t shiftedHighStart; | |
75 | } UTrie2Header; | |
76 | ||
77 | /** | |
78 | * Constants for use with UTrie2Header.options. | |
79 | * @internal | |
80 | */ | |
81 | enum { | |
82 | /** Mask to get the UTrie2ValueBits valueBits from options. */ | |
83 | UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf | |
84 | }; | |
85 | ||
86 | /* Building a trie ---------------------------------------------------------- */ | |
87 | ||
88 | /* | |
89 | * These definitions are mostly needed by utrie2_builder.c, but also by | |
90 | * utrie2_get32() and utrie2_enum(). | |
91 | */ | |
92 | ||
93 | enum { | |
94 | /** | |
95 | * At build time, leave a gap in the index-2 table, | |
96 | * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table | |
97 | * and the supplementary index-1 table. | |
98 | * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. | |
99 | */ | |
100 | UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, | |
101 | UNEWTRIE2_INDEX_GAP_LENGTH= | |
102 | ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)& | |
103 | ~UTRIE2_INDEX_2_MASK, | |
104 | ||
105 | /** | |
106 | * Maximum length of the build-time index-2 array. | |
107 | * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, | |
108 | * plus the part of the index-2 table for lead surrogate code points, | |
109 | * plus the build-time index gap, | |
110 | * plus the null index-2 block. | |
111 | */ | |
112 | UNEWTRIE2_MAX_INDEX_2_LENGTH= | |
113 | (0x110000>>UTRIE2_SHIFT_2)+ | |
114 | UTRIE2_LSCP_INDEX_2_LENGTH+ | |
115 | UNEWTRIE2_INDEX_GAP_LENGTH+ | |
116 | UTRIE2_INDEX_2_BLOCK_LENGTH, | |
117 | ||
118 | UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1 | |
119 | }; | |
120 | ||
121 | /** | |
122 | * Maximum length of the build-time data array. | |
123 | * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, | |
124 | * plus values for the 0x400 surrogate code units. | |
125 | */ | |
126 | #define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400) | |
127 | ||
128 | /* | |
129 | * Build-time trie structure. | |
130 | * | |
131 | * Just using a boolean flag for "repeat use" could lead to data array overflow | |
132 | * because we would not be able to detect when a data block becomes unused. | |
133 | * It also leads to orphan data blocks that are kept through serialization. | |
134 | * | |
135 | * Need to use reference counting for data blocks, | |
136 | * and allocDataBlock() needs to look for a free block before increasing dataLength. | |
137 | * | |
138 | * This scheme seems like overkill for index-2 blocks since the whole index array is | |
139 | * preallocated anyway (unlike the growable data array). | |
140 | * Just allocating multiple index-2 blocks as needed. | |
141 | */ | |
142 | struct UNewTrie2 { | |
143 | int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; | |
144 | int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; | |
145 | uint32_t *data; | |
146 | ||
147 | uint32_t initialValue, errorValue; | |
148 | int32_t index2Length, dataCapacity, dataLength; | |
149 | int32_t firstFreeBlock; | |
150 | int32_t index2NullOffset, dataNullOffset; | |
151 | UChar32 highStart; | |
152 | UBool isCompacted; | |
153 | ||
154 | /** | |
155 | * Multi-purpose per-data-block table. | |
156 | * | |
157 | * Before compacting: | |
158 | * | |
159 | * Per-data-block reference counters/free-block list. | |
160 | * 0: unused | |
161 | * >0: reference counter (number of index-2 entries pointing here) | |
162 | * <0: next free data block in free-block list | |
163 | * | |
164 | * While compacting: | |
165 | * | |
166 | * Map of adjusted indexes, used in compactData() and compactIndex2(). | |
167 | * Maps from original indexes to new ones. | |
168 | */ | |
169 | int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; | |
170 | }; | |
171 | ||
172 | #endif |