]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | // ucptrie.cpp (modified from utrie2.cpp) | |
5 | // created: 2017dec29 Markus W. Scherer | |
6 | ||
7 | // #define UCPTRIE_DEBUG | |
8 | #ifdef UCPTRIE_DEBUG | |
9 | # include <stdio.h> | |
10 | #endif | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | #include "unicode/ucptrie.h" | |
14 | #include "unicode/utf.h" | |
15 | #include "unicode/utf8.h" | |
16 | #include "unicode/utf16.h" | |
17 | #include "cmemory.h" | |
18 | #include "uassert.h" | |
19 | #include "ucptrie_impl.h" | |
20 | ||
21 | U_CAPI UCPTrie * U_EXPORT2 | |
22 | ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, | |
23 | const void *data, int32_t length, int32_t *pActualLength, | |
24 | UErrorCode *pErrorCode) { | |
25 | if (U_FAILURE(*pErrorCode)) { | |
26 | return nullptr; | |
27 | } | |
28 | ||
29 | if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) || | |
30 | type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type || | |
31 | valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) { | |
32 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
33 | return nullptr; | |
34 | } | |
35 | ||
36 | // Enough data for a trie header? | |
37 | if (length < (int32_t)sizeof(UCPTrieHeader)) { | |
38 | *pErrorCode = U_INVALID_FORMAT_ERROR; | |
39 | return nullptr; | |
40 | } | |
41 | ||
42 | // Check the signature. | |
43 | const UCPTrieHeader *header = (const UCPTrieHeader *)data; | |
44 | if (header->signature != UCPTRIE_SIG) { | |
45 | *pErrorCode = U_INVALID_FORMAT_ERROR; | |
46 | return nullptr; | |
47 | } | |
48 | ||
49 | int32_t options = header->options; | |
50 | int32_t typeInt = (options >> 6) & 3; | |
51 | int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK; | |
52 | if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 || | |
53 | (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) { | |
54 | *pErrorCode = U_INVALID_FORMAT_ERROR; | |
55 | return nullptr; | |
56 | } | |
57 | UCPTrieType actualType = (UCPTrieType)typeInt; | |
58 | UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt; | |
59 | if (type < 0) { | |
60 | type = actualType; | |
61 | } | |
62 | if (valueWidth < 0) { | |
63 | valueWidth = actualValueWidth; | |
64 | } | |
65 | if (type != actualType || valueWidth != actualValueWidth) { | |
66 | *pErrorCode = U_INVALID_FORMAT_ERROR; | |
67 | return nullptr; | |
68 | } | |
69 | ||
70 | // Get the length values and offsets. | |
71 | UCPTrie tempTrie; | |
72 | uprv_memset(&tempTrie, 0, sizeof(tempTrie)); | |
73 | tempTrie.indexLength = header->indexLength; | |
74 | tempTrie.dataLength = | |
75 | ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength; | |
76 | tempTrie.index3NullOffset = header->index3NullOffset; | |
77 | tempTrie.dataNullOffset = | |
78 | ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset; | |
79 | ||
80 | tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2; | |
81 | tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12; | |
82 | tempTrie.type = type; | |
83 | tempTrie.valueWidth = valueWidth; | |
84 | ||
85 | // Calculate the actual length. | |
86 | int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2; | |
87 | if (valueWidth == UCPTRIE_VALUE_BITS_16) { | |
88 | actualLength += tempTrie.dataLength * 2; | |
89 | } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { | |
90 | actualLength += tempTrie.dataLength * 4; | |
91 | } else { | |
92 | actualLength += tempTrie.dataLength; | |
93 | } | |
94 | if (length < actualLength) { | |
95 | *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes. | |
96 | return nullptr; | |
97 | } | |
98 | ||
99 | // Allocate the trie. | |
100 | UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie)); | |
101 | if (trie == nullptr) { | |
102 | *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | |
103 | return nullptr; | |
104 | } | |
105 | uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); | |
106 | #ifdef UCPTRIE_DEBUG | |
107 | trie->name = "fromSerialized"; | |
108 | #endif | |
109 | ||
110 | // Set the pointers to its index and data arrays. | |
111 | const uint16_t *p16 = (const uint16_t *)(header + 1); | |
112 | trie->index = p16; | |
113 | p16 += trie->indexLength; | |
114 | ||
115 | // Get the data. | |
116 | int32_t nullValueOffset = trie->dataNullOffset; | |
117 | if (nullValueOffset >= trie->dataLength) { | |
118 | nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; | |
119 | } | |
120 | switch (valueWidth) { | |
121 | case UCPTRIE_VALUE_BITS_16: | |
122 | trie->data.ptr16 = p16; | |
123 | trie->nullValue = trie->data.ptr16[nullValueOffset]; | |
124 | break; | |
125 | case UCPTRIE_VALUE_BITS_32: | |
126 | trie->data.ptr32 = (const uint32_t *)p16; | |
127 | trie->nullValue = trie->data.ptr32[nullValueOffset]; | |
128 | break; | |
129 | case UCPTRIE_VALUE_BITS_8: | |
130 | trie->data.ptr8 = (const uint8_t *)p16; | |
131 | trie->nullValue = trie->data.ptr8[nullValueOffset]; | |
132 | break; | |
133 | default: | |
134 | // Unreachable because valueWidth was checked above. | |
135 | *pErrorCode = U_INVALID_FORMAT_ERROR; | |
136 | return nullptr; | |
137 | } | |
138 | ||
139 | if (pActualLength != nullptr) { | |
140 | *pActualLength = actualLength; | |
141 | } | |
142 | return trie; | |
143 | } | |
144 | ||
145 | U_CAPI void U_EXPORT2 | |
146 | ucptrie_close(UCPTrie *trie) { | |
147 | uprv_free(trie); | |
148 | } | |
149 | ||
150 | U_CAPI UCPTrieType U_EXPORT2 | |
151 | ucptrie_getType(const UCPTrie *trie) { | |
152 | return (UCPTrieType)trie->type; | |
153 | } | |
154 | ||
155 | U_CAPI UCPTrieValueWidth U_EXPORT2 | |
156 | ucptrie_getValueWidth(const UCPTrie *trie) { | |
157 | return (UCPTrieValueWidth)trie->valueWidth; | |
158 | } | |
159 | ||
160 | U_CAPI int32_t U_EXPORT2 | |
161 | ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) { | |
162 | int32_t i1 = c >> UCPTRIE_SHIFT_1; | |
163 | if (trie->type == UCPTRIE_TYPE_FAST) { | |
164 | U_ASSERT(0xffff < c && c < trie->highStart); | |
165 | i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; | |
166 | } else { | |
167 | U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); | |
168 | i1 += UCPTRIE_SMALL_INDEX_LENGTH; | |
169 | } | |
170 | int32_t i3Block = trie->index[ | |
171 | (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; | |
172 | int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; | |
173 | int32_t dataBlock; | |
174 | if ((i3Block & 0x8000) == 0) { | |
175 | // 16-bit indexes | |
176 | dataBlock = trie->index[i3Block + i3]; | |
177 | } else { | |
178 | // 18-bit indexes stored in groups of 9 entries per 8 indexes. | |
179 | i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); | |
180 | i3 &= 7; | |
181 | dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000; | |
182 | dataBlock |= trie->index[i3Block + i3]; | |
183 | } | |
184 | return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK); | |
185 | } | |
186 | ||
187 | U_CAPI int32_t U_EXPORT2 | |
188 | ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) { | |
189 | UChar32 c = (lt1 << 12) | (t2 << 6) | t3; | |
190 | if (c >= trie->highStart) { | |
191 | // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher. | |
192 | return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; | |
193 | } | |
194 | return ucptrie_internalSmallIndex(trie, c); | |
195 | } | |
196 | ||
197 | U_CAPI int32_t U_EXPORT2 | |
198 | ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, | |
199 | const uint8_t *start, const uint8_t *src) { | |
200 | int32_t i, length; | |
201 | // Support 64-bit pointers by avoiding cast of arbitrary difference. | |
202 | if ((src - start) <= 7) { | |
203 | i = length = (int32_t)(src - start); | |
204 | } else { | |
205 | i = length = 7; | |
206 | start = src - 7; | |
207 | } | |
208 | c = utf8_prevCharSafeBody(start, 0, &i, c, -1); | |
209 | i = length - i; // Number of bytes read backward from src. | |
210 | int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c); | |
211 | return (idx << 3) | i; | |
212 | } | |
213 | ||
214 | namespace { | |
215 | ||
216 | inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) { | |
217 | switch (valueWidth) { | |
218 | case UCPTRIE_VALUE_BITS_16: | |
219 | return data.ptr16[dataIndex]; | |
220 | case UCPTRIE_VALUE_BITS_32: | |
221 | return data.ptr32[dataIndex]; | |
222 | case UCPTRIE_VALUE_BITS_8: | |
223 | return data.ptr8[dataIndex]; | |
224 | default: | |
225 | // Unreachable if the trie is properly initialized. | |
226 | return 0xffffffff; | |
227 | } | |
228 | } | |
229 | ||
230 | } // namespace | |
231 | ||
232 | U_CAPI uint32_t U_EXPORT2 | |
233 | ucptrie_get(const UCPTrie *trie, UChar32 c) { | |
234 | int32_t dataIndex; | |
235 | if ((uint32_t)c <= 0x7f) { | |
236 | // linear ASCII | |
237 | dataIndex = c; | |
238 | } else { | |
239 | UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX; | |
240 | dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c); | |
241 | } | |
242 | return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex); | |
243 | } | |
244 | ||
245 | namespace { | |
246 | ||
247 | constexpr int32_t MAX_UNICODE = 0x10ffff; | |
248 | ||
249 | inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue, | |
250 | UCPMapValueFilter *filter, const void *context) { | |
251 | if (value == trieNullValue) { | |
252 | value = nullValue; | |
253 | } else if (filter != nullptr) { | |
254 | value = filter(context, value); | |
255 | } | |
256 | return value; | |
257 | } | |
258 | ||
259 | UChar32 getRange(const void *t, UChar32 start, | |
260 | UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { | |
261 | if ((uint32_t)start > MAX_UNICODE) { | |
262 | return U_SENTINEL; | |
263 | } | |
264 | const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t); | |
265 | UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; | |
266 | if (start >= trie->highStart) { | |
267 | if (pValue != nullptr) { | |
268 | int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; | |
269 | uint32_t value = getValue(trie->data, valueWidth, di); | |
270 | if (filter != nullptr) { value = filter(context, value); } | |
271 | *pValue = value; | |
272 | } | |
273 | return MAX_UNICODE; | |
274 | } | |
275 | ||
276 | uint32_t nullValue = trie->nullValue; | |
277 | if (filter != nullptr) { nullValue = filter(context, nullValue); } | |
278 | const uint16_t *index = trie->index; | |
279 | ||
280 | int32_t prevI3Block = -1; | |
281 | int32_t prevBlock = -1; | |
282 | UChar32 c = start; | |
340931cb | 283 | uint32_t trieValue, value = nullValue; |
3d1f044b A |
284 | bool haveValue = false; |
285 | do { | |
286 | int32_t i3Block; | |
287 | int32_t i3; | |
288 | int32_t i3BlockLength; | |
289 | int32_t dataBlockLength; | |
290 | if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) { | |
291 | i3Block = 0; | |
292 | i3 = c >> UCPTRIE_FAST_SHIFT; | |
293 | i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ? | |
294 | UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; | |
295 | dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; | |
296 | } else { | |
297 | // Use the multi-stage index. | |
298 | int32_t i1 = c >> UCPTRIE_SHIFT_1; | |
299 | if (trie->type == UCPTRIE_TYPE_FAST) { | |
300 | U_ASSERT(0xffff < c && c < trie->highStart); | |
301 | i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; | |
302 | } else { | |
303 | U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); | |
304 | i1 += UCPTRIE_SMALL_INDEX_LENGTH; | |
305 | } | |
306 | i3Block = trie->index[ | |
307 | (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; | |
308 | if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) { | |
309 | // The index-3 block is the same as the previous one, and filled with value. | |
310 | U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); | |
311 | c += UCPTRIE_CP_PER_INDEX_2_ENTRY; | |
312 | continue; | |
313 | } | |
314 | prevI3Block = i3Block; | |
315 | if (i3Block == trie->index3NullOffset) { | |
316 | // This is the index-3 null block. | |
317 | if (haveValue) { | |
318 | if (nullValue != value) { | |
319 | return c - 1; | |
320 | } | |
321 | } else { | |
322 | trieValue = trie->nullValue; | |
323 | value = nullValue; | |
324 | if (pValue != nullptr) { *pValue = nullValue; } | |
325 | haveValue = true; | |
326 | } | |
327 | prevBlock = trie->dataNullOffset; | |
328 | c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); | |
329 | continue; | |
330 | } | |
331 | i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; | |
332 | i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH; | |
333 | dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; | |
334 | } | |
335 | // Enumerate data blocks for one index-3 block. | |
336 | do { | |
337 | int32_t block; | |
338 | if ((i3Block & 0x8000) == 0) { | |
339 | block = index[i3Block + i3]; | |
340 | } else { | |
341 | // 18-bit indexes stored in groups of 9 entries per 8 indexes. | |
342 | int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); | |
343 | int32_t gi = i3 & 7; | |
344 | block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000; | |
345 | block |= index[group + gi]; | |
346 | } | |
347 | if (block == prevBlock && (c - start) >= dataBlockLength) { | |
348 | // The block is the same as the previous one, and filled with value. | |
349 | U_ASSERT((c & (dataBlockLength - 1)) == 0); | |
350 | c += dataBlockLength; | |
351 | } else { | |
352 | int32_t dataMask = dataBlockLength - 1; | |
353 | prevBlock = block; | |
354 | if (block == trie->dataNullOffset) { | |
355 | // This is the data null block. | |
356 | if (haveValue) { | |
357 | if (nullValue != value) { | |
358 | return c - 1; | |
359 | } | |
360 | } else { | |
361 | trieValue = trie->nullValue; | |
362 | value = nullValue; | |
363 | if (pValue != nullptr) { *pValue = nullValue; } | |
364 | haveValue = true; | |
365 | } | |
366 | c = (c + dataBlockLength) & ~dataMask; | |
367 | } else { | |
368 | int32_t di = block + (c & dataMask); | |
369 | uint32_t trieValue2 = getValue(trie->data, valueWidth, di); | |
370 | if (haveValue) { | |
371 | if (trieValue2 != trieValue) { | |
372 | if (filter == nullptr || | |
373 | maybeFilterValue(trieValue2, trie->nullValue, nullValue, | |
374 | filter, context) != value) { | |
375 | return c - 1; | |
376 | } | |
377 | trieValue = trieValue2; // may or may not help | |
378 | } | |
379 | } else { | |
380 | trieValue = trieValue2; | |
381 | value = maybeFilterValue(trieValue2, trie->nullValue, nullValue, | |
382 | filter, context); | |
383 | if (pValue != nullptr) { *pValue = value; } | |
384 | haveValue = true; | |
385 | } | |
386 | while ((++c & dataMask) != 0) { | |
387 | trieValue2 = getValue(trie->data, valueWidth, ++di); | |
388 | if (trieValue2 != trieValue) { | |
389 | if (filter == nullptr || | |
390 | maybeFilterValue(trieValue2, trie->nullValue, nullValue, | |
391 | filter, context) != value) { | |
392 | return c - 1; | |
393 | } | |
394 | trieValue = trieValue2; // may or may not help | |
395 | } | |
396 | } | |
397 | } | |
398 | } | |
399 | } while (++i3 < i3BlockLength); | |
400 | } while (c < trie->highStart); | |
401 | U_ASSERT(haveValue); | |
402 | int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; | |
403 | uint32_t highValue = getValue(trie->data, valueWidth, di); | |
404 | if (maybeFilterValue(highValue, trie->nullValue, nullValue, | |
405 | filter, context) != value) { | |
406 | return c - 1; | |
407 | } else { | |
408 | return MAX_UNICODE; | |
409 | } | |
410 | } | |
411 | ||
412 | } // namespace | |
413 | ||
414 | U_CFUNC UChar32 | |
415 | ucptrie_internalGetRange(UCPTrieGetRange *getRange, | |
416 | const void *trie, UChar32 start, | |
417 | UCPMapRangeOption option, uint32_t surrogateValue, | |
418 | UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { | |
419 | if (option == UCPMAP_RANGE_NORMAL) { | |
420 | return getRange(trie, start, filter, context, pValue); | |
421 | } | |
422 | uint32_t value; | |
423 | if (pValue == nullptr) { | |
424 | // We need to examine the range value even if the caller does not want it. | |
425 | pValue = &value; | |
426 | } | |
427 | UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; | |
428 | UChar32 end = getRange(trie, start, filter, context, pValue); | |
429 | if (end < 0xd7ff || start > surrEnd) { | |
430 | return end; | |
431 | } | |
432 | // The range overlaps with surrogates, or ends just before the first one. | |
433 | if (*pValue == surrogateValue) { | |
434 | if (end >= surrEnd) { | |
435 | // Surrogates followed by a non-surrogateValue range, | |
436 | // or surrogates are part of a larger surrogateValue range. | |
437 | return end; | |
438 | } | |
439 | } else { | |
440 | if (start <= 0xd7ff) { | |
441 | return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates. | |
442 | } | |
443 | // Start is a surrogate with a non-surrogateValue code *unit* value. | |
444 | // Return a surrogateValue code *point* range. | |
445 | *pValue = surrogateValue; | |
446 | if (end > surrEnd) { | |
447 | return surrEnd; // Surrogate range ends before non-surrogateValue rest of range. | |
448 | } | |
449 | } | |
450 | // See if the surrogateValue surrogate range can be merged with | |
451 | // an immediately following range. | |
452 | uint32_t value2; | |
453 | UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2); | |
454 | if (value2 == surrogateValue) { | |
455 | return end2; | |
456 | } | |
457 | return surrEnd; | |
458 | } | |
459 | ||
460 | U_CAPI UChar32 U_EXPORT2 | |
461 | ucptrie_getRange(const UCPTrie *trie, UChar32 start, | |
462 | UCPMapRangeOption option, uint32_t surrogateValue, | |
463 | UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { | |
464 | return ucptrie_internalGetRange(getRange, trie, start, | |
465 | option, surrogateValue, | |
466 | filter, context, pValue); | |
467 | } | |
468 | ||
469 | U_CAPI int32_t U_EXPORT2 | |
470 | ucptrie_toBinary(const UCPTrie *trie, | |
471 | void *data, int32_t capacity, | |
472 | UErrorCode *pErrorCode) { | |
473 | if (U_FAILURE(*pErrorCode)) { | |
474 | return 0; | |
475 | } | |
476 | ||
477 | UCPTrieType type = (UCPTrieType)trie->type; | |
478 | UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; | |
479 | if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || | |
480 | valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth || | |
481 | capacity < 0 || | |
482 | (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) { | |
483 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
484 | return 0; | |
485 | } | |
486 | ||
487 | int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2; | |
488 | switch (valueWidth) { | |
489 | case UCPTRIE_VALUE_BITS_16: | |
490 | length += trie->dataLength * 2; | |
491 | break; | |
492 | case UCPTRIE_VALUE_BITS_32: | |
493 | length += trie->dataLength * 4; | |
494 | break; | |
495 | case UCPTRIE_VALUE_BITS_8: | |
496 | length += trie->dataLength; | |
497 | break; | |
498 | default: | |
499 | // unreachable | |
500 | break; | |
501 | } | |
502 | if (capacity < length) { | |
503 | *pErrorCode = U_BUFFER_OVERFLOW_ERROR; | |
504 | return length; | |
505 | } | |
506 | ||
507 | char *bytes = (char *)data; | |
508 | UCPTrieHeader *header = (UCPTrieHeader *)bytes; | |
509 | header->signature = UCPTRIE_SIG; // "Tri3" | |
510 | header->options = (uint16_t)( | |
511 | ((trie->dataLength & 0xf0000) >> 4) | | |
512 | ((trie->dataNullOffset & 0xf0000) >> 8) | | |
513 | (trie->type << 6) | | |
514 | valueWidth); | |
515 | header->indexLength = (uint16_t)trie->indexLength; | |
516 | header->dataLength = (uint16_t)trie->dataLength; | |
517 | header->index3NullOffset = trie->index3NullOffset; | |
518 | header->dataNullOffset = (uint16_t)trie->dataNullOffset; | |
519 | header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2; | |
520 | bytes += sizeof(UCPTrieHeader); | |
521 | ||
522 | uprv_memcpy(bytes, trie->index, trie->indexLength * 2); | |
523 | bytes += trie->indexLength * 2; | |
524 | ||
525 | switch (valueWidth) { | |
526 | case UCPTRIE_VALUE_BITS_16: | |
527 | uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2); | |
528 | break; | |
529 | case UCPTRIE_VALUE_BITS_32: | |
530 | uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4); | |
531 | break; | |
532 | case UCPTRIE_VALUE_BITS_8: | |
533 | uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength); | |
534 | break; | |
535 | default: | |
536 | // unreachable | |
537 | break; | |
538 | } | |
539 | return length; | |
540 | } | |
541 | ||
542 | namespace { | |
543 | ||
544 | #ifdef UCPTRIE_DEBUG | |
545 | long countNull(const UCPTrie *trie) { | |
546 | uint32_t nullValue=trie->nullValue; | |
547 | int32_t length=trie->dataLength; | |
548 | long count=0; | |
549 | switch (trie->valueWidth) { | |
550 | case UCPTRIE_VALUE_BITS_16: | |
551 | for(int32_t i=0; i<length; ++i) { | |
552 | if(trie->data.ptr16[i]==nullValue) { ++count; } | |
553 | } | |
554 | break; | |
555 | case UCPTRIE_VALUE_BITS_32: | |
556 | for(int32_t i=0; i<length; ++i) { | |
557 | if(trie->data.ptr32[i]==nullValue) { ++count; } | |
558 | } | |
559 | break; | |
560 | case UCPTRIE_VALUE_BITS_8: | |
561 | for(int32_t i=0; i<length; ++i) { | |
562 | if(trie->data.ptr8[i]==nullValue) { ++count; } | |
563 | } | |
564 | break; | |
565 | default: | |
566 | // unreachable | |
567 | break; | |
568 | } | |
569 | return count; | |
570 | } | |
571 | ||
572 | U_CFUNC void | |
573 | ucptrie_printLengths(const UCPTrie *trie, const char *which) { | |
574 | long indexLength=trie->indexLength; | |
575 | long dataLength=(long)trie->dataLength; | |
576 | long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+ | |
577 | dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 : | |
578 | trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1); | |
579 | printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n", | |
580 | which, trie->name, indexLength, dataLength, countNull(trie), totalLength); | |
581 | } | |
582 | #endif | |
583 | ||
584 | } // namespace | |
585 | ||
586 | // UCPMap ---- | |
587 | // Initially, this is the same as UCPTrie. This may well change. | |
588 | ||
589 | U_CAPI uint32_t U_EXPORT2 | |
590 | ucpmap_get(const UCPMap *map, UChar32 c) { | |
591 | return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c); | |
592 | } | |
593 | ||
594 | U_CAPI UChar32 U_EXPORT2 | |
595 | ucpmap_getRange(const UCPMap *map, UChar32 start, | |
596 | UCPMapRangeOption option, uint32_t surrogateValue, | |
597 | UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { | |
598 | return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start, | |
599 | option, surrogateValue, | |
600 | filter, context, pValue); | |
601 | } |