]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ****************************************************************************** | |
3 | * | |
4 | * Copyright (C) 2001-2010, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ****************************************************************************** | |
8 | * file name: utrie2.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2008aug16 (starting from a copy of utrie.c) | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * This is a common implementation of a Unicode trie. | |
17 | * It is a kind of compressed, serializable table of 16- or 32-bit values associated with | |
18 | * Unicode code points (0..0x10ffff). | |
19 | * This is the second common version of a Unicode trie (hence the name UTrie2). | |
20 | * See utrie2.h for a comparison. | |
21 | * | |
22 | * This file contains only the runtime and enumeration code, for read-only access. | |
23 | * See utrie2_builder.c for the builder code. | |
24 | */ | |
25 | #ifdef UTRIE2_DEBUG | |
26 | # include <stdio.h> | |
27 | #endif | |
28 | ||
29 | #include "unicode/utypes.h" | |
30 | #include "cmemory.h" | |
31 | #include "utrie2.h" | |
32 | #include "utrie2_impl.h" | |
33 | ||
34 | /* Public UTrie2 API implementation ----------------------------------------- */ | |
35 | ||
36 | static uint32_t | |
37 | get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { | |
38 | int32_t i2, block; | |
39 | ||
40 | if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { | |
41 | return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; | |
42 | } | |
43 | ||
44 | if(U_IS_LEAD(c) && fromLSCP) { | |
45 | i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ | |
46 | (c>>UTRIE2_SHIFT_2); | |
47 | } else { | |
48 | i2=trie->index1[c>>UTRIE2_SHIFT_1]+ | |
49 | ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); | |
50 | } | |
51 | block=trie->index2[i2]; | |
52 | return trie->data[block+(c&UTRIE2_DATA_MASK)]; | |
53 | } | |
54 | ||
55 | U_CAPI uint32_t U_EXPORT2 | |
56 | utrie2_get32(const UTrie2 *trie, UChar32 c) { | |
57 | if(trie->data16!=NULL) { | |
58 | return UTRIE2_GET16(trie, c); | |
59 | } else if(trie->data32!=NULL) { | |
60 | return UTRIE2_GET32(trie, c); | |
61 | } else if((uint32_t)c>0x10ffff) { | |
62 | return trie->errorValue; | |
63 | } else { | |
64 | return get32(trie->newTrie, c, TRUE); | |
65 | } | |
66 | } | |
67 | ||
68 | U_CAPI uint32_t U_EXPORT2 | |
69 | utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { | |
70 | if(!U_IS_LEAD(c)) { | |
71 | return trie->errorValue; | |
72 | } | |
73 | if(trie->data16!=NULL) { | |
74 | return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); | |
75 | } else if(trie->data32!=NULL) { | |
76 | return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); | |
77 | } else { | |
78 | return get32(trie->newTrie, c, FALSE); | |
79 | } | |
80 | } | |
81 | ||
82 | static U_INLINE int32_t | |
83 | u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { | |
84 | int32_t idx= | |
85 | _UTRIE2_INDEX_FROM_CP( | |
86 | trie, | |
87 | trie->data32==NULL ? trie->indexLength : 0, | |
88 | c); | |
89 | return (idx<<3)|i; | |
90 | } | |
91 | ||
92 | U_CAPI int32_t U_EXPORT2 | |
93 | utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, | |
94 | const uint8_t *src, const uint8_t *limit) { | |
95 | int32_t i, length; | |
96 | i=0; | |
97 | /* support 64-bit pointers by avoiding cast of arbitrary difference */ | |
98 | if((limit-src)<=7) { | |
99 | length=(int32_t)(limit-src); | |
100 | } else { | |
101 | length=7; | |
102 | } | |
103 | c=utf8_nextCharSafeBody(src, &i, length, c, -1); | |
104 | return u8Index(trie, c, i); | |
105 | } | |
106 | ||
107 | U_CAPI int32_t U_EXPORT2 | |
108 | utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, | |
109 | const uint8_t *start, const uint8_t *src) { | |
110 | int32_t i, length; | |
111 | /* support 64-bit pointers by avoiding cast of arbitrary difference */ | |
112 | if((src-start)<=7) { | |
113 | i=length=(int32_t)(src-start); | |
114 | } else { | |
115 | i=length=7; | |
116 | start=src-7; | |
117 | } | |
118 | c=utf8_prevCharSafeBody(start, 0, &i, c, -1); | |
119 | i=length-i; /* number of bytes read backward from src */ | |
120 | return u8Index(trie, c, i); | |
121 | } | |
122 | ||
123 | U_CAPI UTrie2 * U_EXPORT2 | |
124 | utrie2_openFromSerialized(UTrie2ValueBits valueBits, | |
125 | const void *data, int32_t length, int32_t *pActualLength, | |
126 | UErrorCode *pErrorCode) { | |
127 | const UTrie2Header *header; | |
128 | const uint16_t *p16; | |
129 | int32_t actualLength; | |
130 | ||
131 | UTrie2 tempTrie; | |
132 | UTrie2 *trie; | |
133 | ||
134 | if(U_FAILURE(*pErrorCode)) { | |
135 | return 0; | |
136 | } | |
137 | ||
138 | if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || | |
139 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits | |
140 | ) { | |
141 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
142 | return 0; | |
143 | } | |
144 | ||
145 | /* enough data for a trie header? */ | |
146 | if(length<(int32_t)sizeof(UTrie2Header)) { | |
147 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
148 | return 0; | |
149 | } | |
150 | ||
151 | /* check the signature */ | |
152 | header=(const UTrie2Header *)data; | |
153 | if(header->signature!=UTRIE2_SIG) { | |
154 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
155 | return 0; | |
156 | } | |
157 | ||
158 | /* get the options */ | |
159 | if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { | |
160 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
161 | return 0; | |
162 | } | |
163 | ||
164 | /* get the length values and offsets */ | |
165 | uprv_memset(&tempTrie, 0, sizeof(tempTrie)); | |
166 | tempTrie.indexLength=header->indexLength; | |
167 | tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; | |
168 | tempTrie.index2NullOffset=header->index2NullOffset; | |
169 | tempTrie.dataNullOffset=header->dataNullOffset; | |
170 | ||
171 | tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; | |
172 | tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; | |
173 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
174 | tempTrie.highValueIndex+=tempTrie.indexLength; | |
175 | } | |
176 | ||
177 | /* calculate the actual length */ | |
178 | actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; | |
179 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
180 | actualLength+=tempTrie.dataLength*2; | |
181 | } else { | |
182 | actualLength+=tempTrie.dataLength*4; | |
183 | } | |
184 | if(length<actualLength) { | |
185 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ | |
186 | return 0; | |
187 | } | |
188 | ||
189 | /* allocate the trie */ | |
190 | trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); | |
191 | if(trie==NULL) { | |
192 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
193 | return 0; | |
194 | } | |
195 | uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); | |
196 | trie->memory=(uint32_t *)data; | |
197 | trie->length=actualLength; | |
198 | trie->isMemoryOwned=FALSE; | |
199 | ||
200 | /* set the pointers to its index and data arrays */ | |
201 | p16=(const uint16_t *)(header+1); | |
202 | trie->index=p16; | |
203 | p16+=trie->indexLength; | |
204 | ||
205 | /* get the data */ | |
206 | switch(valueBits) { | |
207 | case UTRIE2_16_VALUE_BITS: | |
208 | trie->data16=p16; | |
209 | trie->data32=NULL; | |
210 | trie->initialValue=trie->index[trie->dataNullOffset]; | |
211 | trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; | |
212 | break; | |
213 | case UTRIE2_32_VALUE_BITS: | |
214 | trie->data16=NULL; | |
215 | trie->data32=(const uint32_t *)p16; | |
216 | trie->initialValue=trie->data32[trie->dataNullOffset]; | |
217 | trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; | |
218 | break; | |
219 | default: | |
220 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
221 | return 0; | |
222 | } | |
223 | ||
224 | if(pActualLength!=NULL) { | |
225 | *pActualLength=actualLength; | |
226 | } | |
227 | return trie; | |
228 | } | |
229 | ||
230 | U_CAPI UTrie2 * U_EXPORT2 | |
231 | utrie2_openDummy(UTrie2ValueBits valueBits, | |
232 | uint32_t initialValue, uint32_t errorValue, | |
233 | UErrorCode *pErrorCode) { | |
234 | UTrie2 *trie; | |
235 | UTrie2Header *header; | |
236 | uint32_t *p; | |
237 | uint16_t *dest16; | |
238 | int32_t indexLength, dataLength, length, i; | |
239 | int32_t dataMove; /* >0 if the data is moved to the end of the index array */ | |
240 | ||
241 | if(U_FAILURE(*pErrorCode)) { | |
242 | return 0; | |
243 | } | |
244 | ||
245 | if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { | |
246 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
247 | return 0; | |
248 | } | |
249 | ||
250 | /* calculate the total length of the dummy trie data */ | |
251 | indexLength=UTRIE2_INDEX_1_OFFSET; | |
252 | dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; | |
253 | length=(int32_t)sizeof(UTrie2Header)+indexLength*2; | |
254 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
255 | length+=dataLength*2; | |
256 | } else { | |
257 | length+=dataLength*4; | |
258 | } | |
259 | ||
260 | /* allocate the trie */ | |
261 | trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); | |
262 | if(trie==NULL) { | |
263 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
264 | return 0; | |
265 | } | |
266 | uprv_memset(trie, 0, sizeof(UTrie2)); | |
267 | trie->memory=uprv_malloc(length); | |
268 | if(trie->memory==NULL) { | |
269 | uprv_free(trie); | |
270 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
271 | return 0; | |
272 | } | |
273 | trie->length=length; | |
274 | trie->isMemoryOwned=TRUE; | |
275 | ||
276 | /* set the UTrie2 fields */ | |
277 | if(valueBits==UTRIE2_16_VALUE_BITS) { | |
278 | dataMove=indexLength; | |
279 | } else { | |
280 | dataMove=0; | |
281 | } | |
282 | ||
283 | trie->indexLength=indexLength; | |
284 | trie->dataLength=dataLength; | |
285 | trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; | |
286 | trie->dataNullOffset=(uint16_t)dataMove; | |
287 | trie->initialValue=initialValue; | |
288 | trie->errorValue=errorValue; | |
289 | trie->highStart=0; | |
290 | trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; | |
291 | ||
292 | /* set the header fields */ | |
293 | header=(UTrie2Header *)trie->memory; | |
294 | ||
295 | header->signature=UTRIE2_SIG; /* "Tri2" */ | |
296 | header->options=(uint16_t)valueBits; | |
297 | ||
298 | header->indexLength=(uint16_t)indexLength; | |
299 | header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); | |
300 | header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; | |
301 | header->dataNullOffset=(uint16_t)dataMove; | |
302 | header->shiftedHighStart=0; | |
303 | ||
304 | /* fill the index and data arrays */ | |
305 | dest16=(uint16_t *)(header+1); | |
306 | trie->index=dest16; | |
307 | ||
308 | /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ | |
309 | for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { | |
310 | *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ | |
311 | } | |
312 | ||
313 | /* write UTF-8 2-byte index-2 values, not right-shifted */ | |
314 | for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ | |
315 | *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); | |
316 | } | |
317 | for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ | |
318 | *dest16++=(uint16_t)dataMove; | |
319 | } | |
320 | ||
321 | /* write the 16/32-bit data array */ | |
322 | switch(valueBits) { | |
323 | case UTRIE2_16_VALUE_BITS: | |
324 | /* write 16-bit data values */ | |
325 | trie->data16=dest16; | |
326 | trie->data32=NULL; | |
327 | for(i=0; i<0x80; ++i) { | |
328 | *dest16++=(uint16_t)initialValue; | |
329 | } | |
330 | for(; i<0xc0; ++i) { | |
331 | *dest16++=(uint16_t)errorValue; | |
332 | } | |
333 | /* highValue and reserved values */ | |
334 | for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { | |
335 | *dest16++=(uint16_t)initialValue; | |
336 | } | |
337 | break; | |
338 | case UTRIE2_32_VALUE_BITS: | |
339 | /* write 32-bit data values */ | |
340 | p=(uint32_t *)dest16; | |
341 | trie->data16=NULL; | |
342 | trie->data32=p; | |
343 | for(i=0; i<0x80; ++i) { | |
344 | *p++=initialValue; | |
345 | } | |
346 | for(; i<0xc0; ++i) { | |
347 | *p++=errorValue; | |
348 | } | |
349 | /* highValue and reserved values */ | |
350 | for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { | |
351 | *p++=initialValue; | |
352 | } | |
353 | break; | |
354 | default: | |
355 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
356 | return 0; | |
357 | } | |
358 | ||
359 | return trie; | |
360 | } | |
361 | ||
362 | U_CAPI void U_EXPORT2 | |
363 | utrie2_close(UTrie2 *trie) { | |
364 | if(trie!=NULL) { | |
365 | if(trie->isMemoryOwned) { | |
366 | uprv_free(trie->memory); | |
367 | } | |
368 | if(trie->newTrie!=NULL) { | |
369 | uprv_free(trie->newTrie->data); | |
370 | uprv_free(trie->newTrie); | |
371 | } | |
372 | uprv_free(trie); | |
373 | } | |
374 | } | |
375 | ||
376 | U_CAPI int32_t U_EXPORT2 | |
377 | utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { | |
378 | uint32_t signature; | |
379 | if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { | |
380 | return 0; | |
381 | } | |
382 | signature=*(const uint32_t *)data; | |
383 | if(signature==UTRIE2_SIG) { | |
384 | return 2; | |
385 | } | |
386 | if(anyEndianOk && signature==UTRIE2_OE_SIG) { | |
387 | return 2; | |
388 | } | |
389 | if(signature==UTRIE_SIG) { | |
390 | return 1; | |
391 | } | |
392 | if(anyEndianOk && signature==UTRIE_OE_SIG) { | |
393 | return 1; | |
394 | } | |
395 | return 0; | |
396 | } | |
397 | ||
398 | U_CAPI int32_t U_EXPORT2 | |
399 | utrie2_swap(const UDataSwapper *ds, | |
400 | const void *inData, int32_t length, void *outData, | |
401 | UErrorCode *pErrorCode) { | |
402 | const UTrie2Header *inTrie; | |
403 | UTrie2Header trie; | |
404 | int32_t dataLength, size; | |
405 | UTrie2ValueBits valueBits; | |
406 | ||
407 | if(U_FAILURE(*pErrorCode)) { | |
408 | return 0; | |
409 | } | |
410 | if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { | |
411 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
412 | return 0; | |
413 | } | |
414 | ||
415 | /* setup and swapping */ | |
416 | if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { | |
417 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
418 | return 0; | |
419 | } | |
420 | ||
421 | inTrie=(const UTrie2Header *)inData; | |
422 | trie.signature=ds->readUInt32(inTrie->signature); | |
423 | trie.options=ds->readUInt16(inTrie->options); | |
424 | trie.indexLength=ds->readUInt16(inTrie->indexLength); | |
425 | trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); | |
426 | ||
427 | valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); | |
428 | dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; | |
429 | ||
430 | if( trie.signature!=UTRIE2_SIG || | |
431 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || | |
432 | trie.indexLength<UTRIE2_INDEX_1_OFFSET || | |
433 | dataLength<UTRIE2_DATA_START_OFFSET | |
434 | ) { | |
435 | *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ | |
436 | return 0; | |
437 | } | |
438 | ||
439 | size=sizeof(UTrie2Header)+trie.indexLength*2; | |
440 | switch(valueBits) { | |
441 | case UTRIE2_16_VALUE_BITS: | |
442 | size+=dataLength*2; | |
443 | break; | |
444 | case UTRIE2_32_VALUE_BITS: | |
445 | size+=dataLength*4; | |
446 | break; | |
447 | default: | |
448 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
449 | return 0; | |
450 | } | |
451 | ||
452 | if(length>=0) { | |
453 | UTrie2Header *outTrie; | |
454 | ||
455 | if(length<size) { | |
456 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
457 | return 0; | |
458 | } | |
459 | ||
460 | outTrie=(UTrie2Header *)outData; | |
461 | ||
462 | /* swap the header */ | |
463 | ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); | |
464 | ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); | |
465 | ||
466 | /* swap the index and the data */ | |
467 | switch(valueBits) { | |
468 | case UTRIE2_16_VALUE_BITS: | |
469 | ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); | |
470 | break; | |
471 | case UTRIE2_32_VALUE_BITS: | |
472 | ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); | |
473 | ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, | |
474 | (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); | |
475 | break; | |
476 | default: | |
477 | *pErrorCode=U_INVALID_FORMAT_ERROR; | |
478 | return 0; | |
479 | } | |
480 | } | |
481 | ||
482 | return size; | |
483 | } | |
484 | ||
485 | // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c | |
486 | // to avoid a dependency from utrie2.cpp on utrie.c. | |
487 | ||
488 | /* enumeration -------------------------------------------------------------- */ | |
489 | ||
490 | #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) | |
491 | ||
492 | /* default UTrie2EnumValue() returns the input value itself */ | |
493 | static uint32_t U_CALLCONV | |
494 | enumSameValue(const void * /*context*/, uint32_t value) { | |
495 | return value; | |
496 | } | |
497 | ||
498 | /** | |
499 | * Enumerate all ranges of code points with the same relevant values. | |
500 | * The values are transformed from the raw trie entries by the enumValue function. | |
501 | * | |
502 | * Currently requires start<limit and both start and limit must be multiples | |
503 | * of UTRIE2_DATA_BLOCK_LENGTH. | |
504 | * | |
505 | * Optimizations: | |
506 | * - Skip a whole block if we know that it is filled with a single value, | |
507 | * and it is the same as we visited just before. | |
508 | * - Handle the null block specially because we know a priori that it is filled | |
509 | * with a single value. | |
510 | */ | |
511 | static void | |
512 | enumEitherTrie(const UTrie2 *trie, | |
513 | UChar32 start, UChar32 limit, | |
514 | UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { | |
515 | const uint32_t *data32; | |
516 | const uint16_t *idx; | |
517 | ||
518 | uint32_t value, prevValue, initialValue; | |
519 | UChar32 c, prev, highStart; | |
520 | int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; | |
521 | ||
522 | if(enumRange==NULL) { | |
523 | return; | |
524 | } | |
525 | if(enumValue==NULL) { | |
526 | enumValue=enumSameValue; | |
527 | } | |
528 | ||
529 | if(trie->newTrie==NULL) { | |
530 | /* frozen trie */ | |
531 | idx=trie->index; | |
532 | data32=trie->data32; | |
533 | ||
534 | index2NullOffset=trie->index2NullOffset; | |
535 | nullBlock=trie->dataNullOffset; | |
536 | } else { | |
537 | /* unfrozen, mutable trie */ | |
538 | idx=NULL; | |
539 | data32=trie->newTrie->data; | |
540 | ||
541 | index2NullOffset=trie->newTrie->index2NullOffset; | |
542 | nullBlock=trie->newTrie->dataNullOffset; | |
543 | } | |
544 | ||
545 | highStart=trie->highStart; | |
546 | ||
547 | /* get the enumeration value that corresponds to an initial-value trie data entry */ | |
548 | initialValue=enumValue(context, trie->initialValue); | |
549 | ||
550 | /* set variables for previous range */ | |
551 | prevI2Block=-1; | |
552 | prevBlock=-1; | |
553 | prev=start; | |
554 | prevValue=0; | |
555 | ||
556 | /* enumerate index-2 blocks */ | |
557 | for(c=start; c<limit && c<highStart;) { | |
558 | /* Code point limit for iterating inside this i2Block. */ | |
559 | UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; | |
560 | if(limit<tempLimit) { | |
561 | tempLimit=limit; | |
562 | } | |
563 | if(c<=0xffff) { | |
564 | if(!U_IS_SURROGATE(c)) { | |
565 | i2Block=c>>UTRIE2_SHIFT_2; | |
566 | } else if(U_IS_SURROGATE_LEAD(c)) { | |
567 | /* | |
568 | * Enumerate values for lead surrogate code points, not code units: | |
569 | * This special block has half the normal length. | |
570 | */ | |
571 | i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; | |
572 | tempLimit=MIN_VALUE(0xdc00, limit); | |
573 | } else { | |
574 | /* | |
575 | * Switch back to the normal part of the index-2 table. | |
576 | * Enumerate the second half of the surrogates block. | |
577 | */ | |
578 | i2Block=0xd800>>UTRIE2_SHIFT_2; | |
579 | tempLimit=MIN_VALUE(0xe000, limit); | |
580 | } | |
581 | } else { | |
582 | /* supplementary code points */ | |
583 | if(idx!=NULL) { | |
584 | i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ | |
585 | (c>>UTRIE2_SHIFT_1)]; | |
586 | } else { | |
587 | i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; | |
588 | } | |
589 | if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { | |
590 | /* | |
591 | * The index-2 block is the same as the previous one, and filled with prevValue. | |
592 | * Only possible for supplementary code points because the linear-BMP index-2 | |
593 | * table creates unique i2Block values. | |
594 | */ | |
595 | c+=UTRIE2_CP_PER_INDEX_1_ENTRY; | |
596 | continue; | |
597 | } | |
598 | } | |
599 | prevI2Block=i2Block; | |
600 | if(i2Block==index2NullOffset) { | |
601 | /* this is the null index-2 block */ | |
602 | if(prevValue!=initialValue) { | |
603 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { | |
604 | return; | |
605 | } | |
606 | prevBlock=nullBlock; | |
607 | prev=c; | |
608 | prevValue=initialValue; | |
609 | } | |
610 | c+=UTRIE2_CP_PER_INDEX_1_ENTRY; | |
611 | } else { | |
612 | /* enumerate data blocks for one index-2 block */ | |
613 | int32_t i2, i2Limit; | |
614 | i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; | |
615 | if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { | |
616 | i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; | |
617 | } else { | |
618 | i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; | |
619 | } | |
620 | for(; i2<i2Limit; ++i2) { | |
621 | if(idx!=NULL) { | |
622 | block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; | |
623 | } else { | |
624 | block=trie->newTrie->index2[i2Block+i2]; | |
625 | } | |
626 | if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { | |
627 | /* the block is the same as the previous one, and filled with prevValue */ | |
628 | c+=UTRIE2_DATA_BLOCK_LENGTH; | |
629 | continue; | |
630 | } | |
631 | prevBlock=block; | |
632 | if(block==nullBlock) { | |
633 | /* this is the null data block */ | |
634 | if(prevValue!=initialValue) { | |
635 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { | |
636 | return; | |
637 | } | |
638 | prev=c; | |
639 | prevValue=initialValue; | |
640 | } | |
641 | c+=UTRIE2_DATA_BLOCK_LENGTH; | |
642 | } else { | |
643 | for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { | |
644 | value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); | |
645 | if(value!=prevValue) { | |
646 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { | |
647 | return; | |
648 | } | |
649 | prev=c; | |
650 | prevValue=value; | |
651 | } | |
652 | ++c; | |
653 | } | |
654 | } | |
655 | } | |
656 | } | |
657 | } | |
658 | ||
659 | if(c>limit) { | |
660 | c=limit; /* could be higher if in the index2NullOffset */ | |
661 | } else if(c<limit) { | |
662 | /* c==highStart<limit */ | |
663 | uint32_t highValue; | |
664 | if(idx!=NULL) { | |
665 | highValue= | |
666 | data32!=NULL ? | |
667 | data32[trie->highValueIndex] : | |
668 | idx[trie->highValueIndex]; | |
669 | } else { | |
670 | highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; | |
671 | } | |
672 | value=enumValue(context, highValue); | |
673 | if(value!=prevValue) { | |
674 | if(prev<c && !enumRange(context, prev, c-1, prevValue)) { | |
675 | return; | |
676 | } | |
677 | prev=c; | |
678 | prevValue=value; | |
679 | } | |
680 | c=limit; | |
681 | } | |
682 | ||
683 | /* deliver last range */ | |
684 | enumRange(context, prev, c-1, prevValue); | |
685 | } | |
686 | ||
687 | U_CAPI void U_EXPORT2 | |
688 | utrie2_enum(const UTrie2 *trie, | |
689 | UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { | |
690 | enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); | |
691 | } | |
692 | ||
693 | U_CAPI void U_EXPORT2 | |
694 | utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, | |
695 | UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, | |
696 | const void *context) { | |
697 | if(!U16_IS_LEAD(lead)) { | |
698 | return; | |
699 | } | |
700 | lead=(lead-0xd7c0)<<10; /* start code point */ | |
701 | enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); | |
702 | } | |
703 | ||
704 | /* C++ convenience wrappers ------------------------------------------------- */ | |
705 | ||
706 | U_NAMESPACE_BEGIN | |
707 | ||
708 | uint16_t BackwardUTrie2StringIterator::previous16() { | |
709 | codePointLimit=codePointStart; | |
710 | if(start>=codePointStart) { | |
711 | codePoint=U_SENTINEL; | |
712 | return 0; | |
713 | } | |
714 | uint16_t result; | |
715 | UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); | |
716 | return result; | |
717 | } | |
718 | ||
719 | uint16_t ForwardUTrie2StringIterator::next16() { | |
720 | codePointStart=codePointLimit; | |
721 | if(codePointLimit==limit) { | |
722 | codePoint=U_SENTINEL; | |
723 | return 0; | |
724 | } | |
725 | uint16_t result; | |
726 | UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); | |
727 | return result; | |
728 | } | |
729 | ||
730 | UTrie2 *UTrie2Singleton::getInstance(InstantiatorFn *instantiator, const void *context, | |
731 | UErrorCode &errorCode) { | |
732 | void *duplicate; | |
733 | UTrie2 *instance=(UTrie2 *)singleton.getInstance(instantiator, context, duplicate, errorCode); | |
734 | utrie2_close((UTrie2 *)duplicate); | |
735 | return instance; | |
736 | } | |
737 | ||
738 | U_NAMESPACE_END |