]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | // utrie_swap.cpp | |
5 | // created: 2018aug08 Markus W. Scherer | |
6 | ||
7 | #include "unicode/utypes.h" | |
8 | #include "cmemory.h" | |
9 | #include "ucptrie_impl.h" | |
10 | #include "udataswp.h" | |
11 | #include "utrie.h" | |
12 | #include "utrie2_impl.h" | |
13 | ||
14 | // These functions for swapping different generations of ICU code point tries are here | |
15 | // so that their implementation files need not depend on swapper code, | |
16 | // need not depend on each other, and so that other swapper code | |
17 | // need not depend on other trie code. | |
18 | ||
19 | namespace { | |
20 | ||
21 | constexpr int32_t ASCII_LIMIT = 0x80; | |
22 | ||
23 | } // namespace | |
24 | ||
25 | U_CAPI int32_t U_EXPORT2 | |
26 | utrie_swap(const UDataSwapper *ds, | |
27 | const void *inData, int32_t length, void *outData, | |
28 | UErrorCode *pErrorCode) { | |
29 | const UTrieHeader *inTrie; | |
30 | UTrieHeader trie; | |
31 | int32_t size; | |
32 | UBool dataIs32; | |
33 | ||
34 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
35 | return 0; | |
36 | } | |
37 | if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { | |
38 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
39 | return 0; | |
40 | } | |
41 | ||
42 | /* setup and swapping */ | |
43 | if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { | |
44 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
45 | return 0; | |
46 | } | |
47 | ||
48 | inTrie=(const UTrieHeader *)inData; | |
49 | trie.signature=ds->readUInt32(inTrie->signature); | |
50 | trie.options=ds->readUInt32(inTrie->options); | |
51 | trie.indexLength=udata_readInt32(ds, inTrie->indexLength); | |
52 | trie.dataLength=udata_readInt32(ds, inTrie->dataLength); | |
53 | ||
54 | if( trie.signature!=0x54726965 || | |
55 | (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || | |
56 | ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || | |
57 | trie.indexLength<UTRIE_BMP_INDEX_LENGTH || | |
58 | (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || | |
59 | trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || | |
60 | (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || | |
61 | ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) | |
62 | ) { | |
63 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ | |
64 | return 0; | |
65 | } | |
66 | ||
67 | dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); | |
68 | size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); | |
69 | ||
70 | if(length>=0) { | |
71 | UTrieHeader *outTrie; | |
72 | ||
73 | if(length<size) { | |
74 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
75 | return 0; | |
76 | } | |
77 | ||
78 | outTrie=(UTrieHeader *)outData; | |
79 | ||
80 | /* swap the header */ | |
81 | ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); | |
82 | ||
83 | /* swap the index and the data */ | |
84 | if(dataIs32) { | |
85 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); | |
86 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, | |
87 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); | |
88 | } else { | |
89 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); | |
90 | } | |
91 | } | |
92 | ||
93 | return size; | |
94 | } | |
95 | ||
96 | U_CAPI int32_t U_EXPORT2 | |
97 | utrie2_swap(const UDataSwapper *ds, | |
98 | const void *inData, int32_t length, void *outData, | |
99 | UErrorCode *pErrorCode) { | |
100 | const UTrie2Header *inTrie; | |
101 | UTrie2Header trie; | |
102 | int32_t dataLength, size; | |
103 | UTrie2ValueBits valueBits; | |
104 | ||
105 | if(U_FAILURE(*pErrorCode)) { | |
106 | return 0; | |
107 | } | |
108 | if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { | |
109 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
110 | return 0; | |
111 | } | |
112 | ||
113 | /* setup and swapping */ | |
114 | if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { | |
115 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
116 | return 0; | |
117 | } | |
118 | ||
119 | inTrie=(const UTrie2Header *)inData; | |
120 | trie.signature=ds->readUInt32(inTrie->signature); | |
121 | trie.options=ds->readUInt16(inTrie->options); | |
122 | trie.indexLength=ds->readUInt16(inTrie->indexLength); | |
123 | trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); | |
124 | ||
125 | valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); | |
126 | dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; | |
127 | ||
128 | if( trie.signature!=UTRIE2_SIG || | |
129 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || | |
130 | trie.indexLength<UTRIE2_INDEX_1_OFFSET || | |
131 | dataLength<UTRIE2_DATA_START_OFFSET | |
132 | ) { | |
133 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ | |
134 | return 0; | |
135 | } | |
136 | ||
137 | size=sizeof(UTrie2Header)+trie.indexLength*2; | |
138 | switch(valueBits) { | |
139 | case UTRIE2_16_VALUE_BITS: | |
140 | size+=dataLength*2; | |
141 | break; | |
142 | case UTRIE2_32_VALUE_BITS: | |
143 | size+=dataLength*4; | |
144 | break; | |
145 | default: | |
146 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
147 | return 0; | |
148 | } | |
149 | ||
150 | if(length>=0) { | |
151 | UTrie2Header *outTrie; | |
152 | ||
153 | if(length<size) { | |
154 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
155 | return 0; | |
156 | } | |
157 | ||
158 | outTrie=(UTrie2Header *)outData; | |
159 | ||
160 | /* swap the header */ | |
161 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); | |
162 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); | |
163 | ||
164 | /* swap the index and the data */ | |
165 | switch(valueBits) { | |
166 | case UTRIE2_16_VALUE_BITS: | |
167 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); | |
168 | break; | |
169 | case UTRIE2_32_VALUE_BITS: | |
170 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); | |
171 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, | |
172 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); | |
173 | break; | |
174 | default: | |
175 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
176 | return 0; | |
177 | } | |
178 | } | |
179 | ||
180 | return size; | |
181 | } | |
182 | ||
183 | U_CAPI int32_t U_EXPORT2 | |
184 | ucptrie_swap(const UDataSwapper *ds, | |
185 | const void *inData, int32_t length, void *outData, | |
186 | UErrorCode *pErrorCode) { | |
187 | const UCPTrieHeader *inTrie; | |
188 | UCPTrieHeader trie; | |
189 | int32_t dataLength, size; | |
190 | UCPTrieValueWidth valueWidth; | |
191 | ||
192 | if(U_FAILURE(*pErrorCode)) { | |
193 | return 0; | |
194 | } | |
195 | if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { | |
196 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
197 | return 0; | |
198 | } | |
199 | ||
200 | /* setup and swapping */ | |
201 | if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { | |
202 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
203 | return 0; | |
204 | } | |
205 | ||
206 | inTrie=(const UCPTrieHeader *)inData; | |
207 | trie.signature=ds->readUInt32(inTrie->signature); | |
208 | trie.options=ds->readUInt16(inTrie->options); | |
209 | trie.indexLength=ds->readUInt16(inTrie->indexLength); | |
210 | trie.dataLength = ds->readUInt16(inTrie->dataLength); | |
211 | ||
212 | UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); | |
213 | valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); | |
214 | dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; | |
215 | ||
216 | int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? | |
217 | UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; | |
218 | if( trie.signature!=UCPTRIE_SIG || | |
219 | type > UCPTRIE_TYPE_SMALL || | |
220 | (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || | |
221 | valueWidth > UCPTRIE_VALUE_BITS_8 || | |
222 | trie.indexLength < minIndexLength || | |
223 | dataLength < ASCII_LIMIT | |
224 | ) { | |
225 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ | |
226 | return 0; | |
227 | } | |
228 | ||
229 | size=sizeof(UCPTrieHeader)+trie.indexLength*2; | |
230 | switch(valueWidth) { | |
231 | case UCPTRIE_VALUE_BITS_16: | |
232 | size+=dataLength*2; | |
233 | break; | |
234 | case UCPTRIE_VALUE_BITS_32: | |
235 | size+=dataLength*4; | |
236 | break; | |
237 | case UCPTRIE_VALUE_BITS_8: | |
238 | size+=dataLength; | |
239 | break; | |
240 | default: | |
241 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
242 | return 0; | |
243 | } | |
244 | ||
245 | if(length>=0) { | |
246 | UCPTrieHeader *outTrie; | |
247 | ||
248 | if(length<size) { | |
249 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
250 | return 0; | |
251 | } | |
252 | ||
253 | outTrie=(UCPTrieHeader *)outData; | |
254 | ||
255 | /* swap the header */ | |
256 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); | |
257 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); | |
258 | ||
259 | /* swap the index and the data */ | |
260 | switch(valueWidth) { | |
261 | case UCPTRIE_VALUE_BITS_16: | |
262 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); | |
263 | break; | |
264 | case UCPTRIE_VALUE_BITS_32: | |
265 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); | |
266 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, | |
267 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); | |
268 | break; | |
269 | case UCPTRIE_VALUE_BITS_8: | |
270 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); | |
271 | if(inTrie!=outTrie) { | |
272 | uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength); | |
273 | } | |
274 | break; | |
275 | default: | |
276 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
277 | return 0; | |
278 | } | |
279 | } | |
280 | ||
281 | return size; | |
282 | } | |
283 | ||
284 | namespace { | |
285 | ||
286 | /** | |
287 | * Gets the trie version from 32-bit-aligned memory containing the serialized form | |
288 | * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). | |
289 | * | |
290 | * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie | |
291 | * @param length the number of bytes available at data; | |
292 | * can be more than necessary (see return value) | |
293 | * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. | |
294 | * If TRUE, opposite-endian serialized forms are recognized as well. | |
295 | * @return the trie version of the serialized form, or 0 if it is not | |
296 | * recognized as a serialized trie | |
297 | */ | |
298 | int32_t | |
299 | getVersion(const void *data, int32_t length, UBool anyEndianOk) { | |
300 | uint32_t signature; | |
301 | if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { | |
302 | return 0; | |
303 | } | |
304 | signature=*(const uint32_t *)data; | |
305 | if(signature==UCPTRIE_SIG) { | |
306 | return 3; | |
307 | } | |
308 | if(anyEndianOk && signature==UCPTRIE_OE_SIG) { | |
309 | return 3; | |
310 | } | |
311 | if(signature==UTRIE2_SIG) { | |
312 | return 2; | |
313 | } | |
314 | if(anyEndianOk && signature==UTRIE2_OE_SIG) { | |
315 | return 2; | |
316 | } | |
317 | if(signature==UTRIE_SIG) { | |
318 | return 1; | |
319 | } | |
320 | if(anyEndianOk && signature==UTRIE_OE_SIG) { | |
321 | return 1; | |
322 | } | |
323 | return 0; | |
324 | } | |
325 | ||
326 | } // namespace | |
327 | ||
328 | U_CAPI int32_t U_EXPORT2 | |
329 | utrie_swapAnyVersion(const UDataSwapper *ds, | |
330 | const void *inData, int32_t length, void *outData, | |
331 | UErrorCode *pErrorCode) { | |
332 | if(U_FAILURE(*pErrorCode)) { return 0; } | |
333 | switch(getVersion(inData, length, TRUE)) { | |
334 | case 1: | |
335 | return utrie_swap(ds, inData, length, outData, pErrorCode); | |
336 | case 2: | |
337 | return utrie2_swap(ds, inData, length, outData, pErrorCode); | |
338 | case 3: | |
339 | return ucptrie_swap(ds, inData, length, outData, pErrorCode); | |
340 | default: | |
341 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
342 | return 0; | |
343 | } | |
344 | } |