1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 2001-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: utrie2_builder.cpp
12 * tab size: 8 (not used)
15 * created on: 2008sep26 (split off from utrie2.c)
16 * created by: Markus W. Scherer
18 * This is a common implementation of a Unicode trie.
19 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
20 * Unicode code points (0..0x10ffff).
21 * This is the second common version of a Unicode trie (hence the name UTrie2).
22 * See utrie2.h for a comparison.
24 * This file contains only the builder code.
25 * See utrie2.c for the runtime and enumeration code.
27 // #define UTRIE2_DEBUG
31 // #define UCPTRIE_DEBUG
33 #include "unicode/utypes.h"
35 #include "unicode/ucptrie.h"
36 #include "unicode/umutablecptrie.h"
37 #include "ucptrie_impl.h"
41 #include "utrie2_impl.h"
43 #include "utrie.h" // for utrie2_fromUTrie()
45 /* Implementation notes ----------------------------------------------------- */
48 * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
49 * have been chosen to minimize trie sizes overall.
50 * Most of the code is flexible enough to work with a range of values,
51 * within certain limits.
53 * Exception: Support for separate values for lead surrogate code _units_
54 * vs. code _points_ was added after the constants were fixed,
55 * and has not been tested nor particularly designed for different constant values.
56 * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
59 * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
60 * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
61 * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
62 * remapping) stops working.
64 * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
65 * assumes that a single index-2 block is used for 0x400 code points
66 * corresponding to one lead surrogate.
68 * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
69 * more than one Unicode plane, and the split of the index-2 table into a BMP
70 * part and a supplementary part, with a gap in between, would not work.
72 * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
73 * there is data with more than 64k distinct values,
74 * for example for Unihan collation with a separate collation weight per
78 /* Building a trie ----------------------------------------------------------*/
81 /** The null index-2 block, following the gap in the index-2 table. */
82 UNEWTRIE2_INDEX_2_NULL_OFFSET
=UNEWTRIE2_INDEX_GAP_OFFSET
+UNEWTRIE2_INDEX_GAP_LENGTH
,
84 /** The start of allocated index-2 blocks. */
85 UNEWTRIE2_INDEX_2_START_OFFSET
=UNEWTRIE2_INDEX_2_NULL_OFFSET
+UTRIE2_INDEX_2_BLOCK_LENGTH
,
88 * The null data block.
89 * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
90 * to work with 6-bit trail bytes from 2-byte UTF-8.
92 UNEWTRIE2_DATA_NULL_OFFSET
=UTRIE2_DATA_START_OFFSET
,
94 /** The start of allocated data blocks. */
95 UNEWTRIE2_DATA_START_OFFSET
=UNEWTRIE2_DATA_NULL_OFFSET
+0x40,
98 * The start of data blocks for U+0800 and above.
99 * Below, compaction uses a block length of 64 for 2-byte UTF-8.
100 * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
101 * Data values for 0x780 code points beyond ASCII.
103 UNEWTRIE2_DATA_0800_OFFSET
=UNEWTRIE2_DATA_START_OFFSET
+0x780
106 /* Start with allocation of 16k data entries. */
107 #define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
109 /* Grow about 8x each time. */
110 #define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
113 allocIndex2Block(UNewTrie2
*trie
);
115 U_CAPI UTrie2
* U_EXPORT2
116 utrie2_open(uint32_t initialValue
, uint32_t errorValue
, UErrorCode
*pErrorCode
) {
122 if(U_FAILURE(*pErrorCode
)) {
126 trie
=(UTrie2
*)uprv_malloc(sizeof(UTrie2
));
127 newTrie
=(UNewTrie2
*)uprv_malloc(sizeof(UNewTrie2
));
128 data
=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH
*4);
129 if(trie
==NULL
|| newTrie
==NULL
|| data
==NULL
) {
133 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
137 uprv_memset(trie
, 0, sizeof(UTrie2
));
138 trie
->initialValue
=initialValue
;
139 trie
->errorValue
=errorValue
;
140 trie
->highStart
=0x110000;
141 trie
->newTrie
=newTrie
;
148 newTrie
->t3
=umutablecptrie_open(initialValue
, errorValue
, pErrorCode
);
150 newTrie
->dataCapacity
=UNEWTRIE2_INITIAL_DATA_LENGTH
;
151 newTrie
->initialValue
=initialValue
;
152 newTrie
->errorValue
=errorValue
;
153 newTrie
->highStart
=0x110000;
154 newTrie
->firstFreeBlock
=0; /* no free block in the list */
155 newTrie
->isCompacted
=FALSE
;
158 * preallocate and reset
160 * - the bad-UTF-8-data block
161 * - the null data block
163 for(i
=0; i
<0x80; ++i
) {
164 newTrie
->data
[i
]=initialValue
;
167 newTrie
->data
[i
]=errorValue
;
169 for(i
=UNEWTRIE2_DATA_NULL_OFFSET
; i
<UNEWTRIE2_DATA_START_OFFSET
; ++i
) {
170 newTrie
->data
[i
]=initialValue
;
172 newTrie
->dataNullOffset
=UNEWTRIE2_DATA_NULL_OFFSET
;
173 newTrie
->dataLength
=UNEWTRIE2_DATA_START_OFFSET
;
175 /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
176 for(i
=0, j
=0; j
<0x80; ++i
, j
+=UTRIE2_DATA_BLOCK_LENGTH
) {
177 newTrie
->index2
[i
]=j
;
180 /* reference counts for the bad-UTF-8-data block */
181 for(; j
<0xc0; ++i
, j
+=UTRIE2_DATA_BLOCK_LENGTH
) {
185 * Reference counts for the null data block: all blocks except for the ASCII blocks.
186 * Plus 1 so that we don't drop this block during compaction.
187 * Plus as many as needed for lead surrogate code points.
189 /* i==newTrie->dataNullOffset */
191 (0x110000>>UTRIE2_SHIFT_2
)-
192 (0x80>>UTRIE2_SHIFT_2
)+
194 UTRIE2_LSCP_INDEX_2_LENGTH
;
195 j
+=UTRIE2_DATA_BLOCK_LENGTH
;
196 for(; j
<UNEWTRIE2_DATA_START_OFFSET
; ++i
, j
+=UTRIE2_DATA_BLOCK_LENGTH
) {
201 * set the remaining indexes in the BMP index-2 block
202 * to the null data block
204 for(i
=0x80>>UTRIE2_SHIFT_2
; i
<UTRIE2_INDEX_2_BMP_LENGTH
; ++i
) {
205 newTrie
->index2
[i
]=UNEWTRIE2_DATA_NULL_OFFSET
;
209 * Fill the index gap with impossible values so that compaction
210 * does not overlap other index-2 blocks with the gap.
212 for(i
=0; i
<UNEWTRIE2_INDEX_GAP_LENGTH
; ++i
) {
213 newTrie
->index2
[UNEWTRIE2_INDEX_GAP_OFFSET
+i
]=-1;
216 /* set the indexes in the null index-2 block */
217 for(i
=0; i
<UTRIE2_INDEX_2_BLOCK_LENGTH
; ++i
) {
218 newTrie
->index2
[UNEWTRIE2_INDEX_2_NULL_OFFSET
+i
]=UNEWTRIE2_DATA_NULL_OFFSET
;
220 newTrie
->index2NullOffset
=UNEWTRIE2_INDEX_2_NULL_OFFSET
;
221 newTrie
->index2Length
=UNEWTRIE2_INDEX_2_START_OFFSET
;
223 /* set the index-1 indexes for the linear index-2 block */
225 i
<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH
;
226 ++i
, j
+=UTRIE2_INDEX_2_BLOCK_LENGTH
228 newTrie
->index1
[i
]=j
;
231 /* set the remaining index-1 indexes to the null index-2 block */
232 for(; i
<UNEWTRIE2_INDEX_1_LENGTH
; ++i
) {
233 newTrie
->index1
[i
]=UNEWTRIE2_INDEX_2_NULL_OFFSET
;
237 * Preallocate and reset data for U+0080..U+07ff,
238 * for 2-byte UTF-8 which will be compacted in 64-blocks
239 * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
241 for(i
=0x80; i
<0x800; i
+=UTRIE2_DATA_BLOCK_LENGTH
) {
242 utrie2_set32(trie
, i
, initialValue
, pErrorCode
);
249 cloneBuilder(const UNewTrie2
*other
) {
252 trie
=(UNewTrie2
*)uprv_malloc(sizeof(UNewTrie2
));
257 trie
->data
=(uint32_t *)uprv_malloc(other
->dataCapacity
*4);
258 if(trie
->data
==NULL
) {
263 if(other
->t3
==nullptr) {
266 UErrorCode errorCode
=U_ZERO_ERROR
;
267 trie
->t3
=umutablecptrie_clone(other
->t3
, &errorCode
);
270 trie
->dataCapacity
=other
->dataCapacity
;
273 uprv_memcpy(trie
->index1
, other
->index1
, sizeof(trie
->index1
));
274 uprv_memcpy(trie
->index2
, other
->index2
, (size_t)other
->index2Length
*4);
275 trie
->index2NullOffset
=other
->index2NullOffset
;
276 trie
->index2Length
=other
->index2Length
;
278 uprv_memcpy(trie
->data
, other
->data
, (size_t)other
->dataLength
*4);
279 trie
->dataNullOffset
=other
->dataNullOffset
;
280 trie
->dataLength
=other
->dataLength
;
282 /* reference counters */
283 if(other
->isCompacted
) {
284 trie
->firstFreeBlock
=0;
286 uprv_memcpy(trie
->map
, other
->map
, ((size_t)other
->dataLength
>>UTRIE2_SHIFT_2
)*4);
287 trie
->firstFreeBlock
=other
->firstFreeBlock
;
290 trie
->initialValue
=other
->initialValue
;
291 trie
->errorValue
=other
->errorValue
;
292 trie
->highStart
=other
->highStart
;
293 trie
->isCompacted
=other
->isCompacted
;
298 U_CAPI UTrie2
* U_EXPORT2
299 utrie2_clone(const UTrie2
*other
, UErrorCode
*pErrorCode
) {
302 if(U_FAILURE(*pErrorCode
)) {
305 if(other
==NULL
|| (other
->memory
==NULL
&& other
->newTrie
==NULL
)) {
306 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
310 trie
=(UTrie2
*)uprv_malloc(sizeof(UTrie2
));
312 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
315 uprv_memcpy(trie
, other
, sizeof(UTrie2
));
317 if(other
->memory
!=NULL
) {
318 trie
->memory
=uprv_malloc(other
->length
);
319 if(trie
->memory
!=NULL
) {
320 trie
->isMemoryOwned
=TRUE
;
321 uprv_memcpy(trie
->memory
, other
->memory
, other
->length
);
323 /* make the clone's pointers point to its own memory */
324 trie
->index
=(uint16_t *)trie
->memory
+(other
->index
-(uint16_t *)other
->memory
);
325 if(other
->data16
!=NULL
) {
326 trie
->data16
=(uint16_t *)trie
->memory
+(other
->data16
-(uint16_t *)other
->memory
);
328 if(other
->data32
!=NULL
) {
329 trie
->data32
=(uint32_t *)trie
->memory
+(other
->data32
-(uint32_t *)other
->memory
);
332 } else /* other->newTrie!=NULL */ {
333 trie
->newTrie
=cloneBuilder(other
->newTrie
);
336 if(trie
->memory
==NULL
&& trie
->newTrie
==NULL
) {
337 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
344 typedef struct NewTrieAndStatus
{
346 UErrorCode errorCode
;
347 UBool exclusiveLimit
; /* rather than inclusive range end */
350 static UBool U_CALLCONV
351 copyEnumRange(const void *context
, UChar32 start
, UChar32 end
, uint32_t value
) {
352 NewTrieAndStatus
*nt
=(NewTrieAndStatus
*)context
;
353 if(value
!=nt
->trie
->initialValue
) {
354 if(nt
->exclusiveLimit
) {
358 utrie2_set32(nt
->trie
, start
, value
, &nt
->errorCode
);
360 utrie2_setRange32(nt
->trie
, start
, end
, value
, TRUE
, &nt
->errorCode
);
362 return U_SUCCESS(nt
->errorCode
);
369 static long countInitial(const UTrie2
*trie
) {
370 uint32_t initialValue
=trie
->initialValue
;
371 int32_t length
=trie
->dataLength
;
373 if(trie
->data16
!=nullptr) {
374 for(int32_t i
=0; i
<length
; ++i
) {
375 if(trie
->data16
[i
]==initialValue
) { ++count
; }
378 for(int32_t i
=0; i
<length
; ++i
) {
379 if(trie
->data32
[i
]==initialValue
) { ++count
; }
386 utrie_printLengths(const UTrie
*trie
) {
387 long indexLength
=trie
->indexLength
;
388 long dataLength
=(long)trie
->dataLength
;
389 long totalLength
=(long)sizeof(UTrieHeader
)+indexLength
*2+dataLength
*(trie
->data32
!=NULL
? 4 : 2);
390 printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n",
391 indexLength
, dataLength
, totalLength
);
395 utrie2_printLengths(const UTrie2
*trie
, const char *which
) {
396 long indexLength
=trie
->indexLength
;
397 long dataLength
=(long)trie
->dataLength
;
398 long totalLength
=(long)sizeof(UTrie2Header
)+indexLength
*2+dataLength
*(trie
->data32
!=NULL
? 4 : 2);
399 printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n",
400 which
, trie
->name
, indexLength
, dataLength
, countInitial(trie
), totalLength
);
404 U_CAPI UTrie2
* U_EXPORT2
405 utrie2_cloneAsThawed(const UTrie2
*other
, UErrorCode
*pErrorCode
) {
406 NewTrieAndStatus context
;
409 if(U_FAILURE(*pErrorCode
)) {
412 if(other
==NULL
|| (other
->memory
==NULL
&& other
->newTrie
==NULL
)) {
413 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
416 if(other
->newTrie
!=NULL
&& !other
->newTrie
->isCompacted
) {
417 return utrie2_clone(other
, pErrorCode
); /* clone an unfrozen trie */
420 /* Clone the frozen trie by enumerating it and building a new one. */
421 context
.trie
=utrie2_open(other
->initialValue
, other
->errorValue
, pErrorCode
);
422 if(U_FAILURE(*pErrorCode
)) {
425 context
.exclusiveLimit
=FALSE
;
426 context
.errorCode
=*pErrorCode
;
427 utrie2_enum(other
, NULL
, copyEnumRange
, &context
);
428 *pErrorCode
=context
.errorCode
;
429 for(lead
=0xd800; lead
<0xdc00; ++lead
) {
431 if(other
->data32
==NULL
) {
432 value
=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other
, lead
);
434 value
=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other
, lead
);
436 if(value
!=other
->initialValue
) {
437 utrie2_set32ForLeadSurrogateCodeUnit(context
.trie
, lead
, value
, pErrorCode
);
440 if(U_FAILURE(*pErrorCode
)) {
441 utrie2_close(context
.trie
);
447 /* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
448 U_CAPI UTrie2
* U_EXPORT2
449 utrie2_fromUTrie(const UTrie
*trie1
, uint32_t errorValue
, UErrorCode
*pErrorCode
) {
450 NewTrieAndStatus context
;
453 if(U_FAILURE(*pErrorCode
)) {
457 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
460 context
.trie
=utrie2_open(trie1
->initialValue
, errorValue
, pErrorCode
);
461 if(U_FAILURE(*pErrorCode
)) {
464 context
.exclusiveLimit
=TRUE
;
465 context
.errorCode
=*pErrorCode
;
466 utrie_enum(trie1
, NULL
, copyEnumRange
, &context
);
467 *pErrorCode
=context
.errorCode
;
468 for(lead
=0xd800; lead
<0xdc00; ++lead
) {
470 if(trie1
->data32
==NULL
) {
471 value
=UTRIE_GET16_FROM_LEAD(trie1
, lead
);
473 value
=UTRIE_GET32_FROM_LEAD(trie1
, lead
);
475 if(value
!=trie1
->initialValue
) {
476 utrie2_set32ForLeadSurrogateCodeUnit(context
.trie
, lead
, value
, pErrorCode
);
479 if(U_SUCCESS(*pErrorCode
)) {
480 utrie2_freeze(context
.trie
,
481 trie1
->data32
!=NULL
? UTRIE2_32_VALUE_BITS
: UTRIE2_16_VALUE_BITS
,
485 if(U_SUCCESS(*pErrorCode
)) {
486 utrie_printLengths(trie1
);
487 utrie2_printLengths(context
.trie
, "fromUTrie");
490 if(U_FAILURE(*pErrorCode
)) {
491 utrie2_close(context
.trie
);
498 isInNullBlock(UNewTrie2
*trie
, UChar32 c
, UBool forLSCP
) {
501 if(U_IS_LEAD(c
) && forLSCP
) {
502 i2
=(UTRIE2_LSCP_INDEX_2_OFFSET
-(0xd800>>UTRIE2_SHIFT_2
))+
505 i2
=trie
->index1
[c
>>UTRIE2_SHIFT_1
]+
506 ((c
>>UTRIE2_SHIFT_2
)&UTRIE2_INDEX_2_MASK
);
508 block
=trie
->index2
[i2
];
509 return (UBool
)(block
==trie
->dataNullOffset
);
513 allocIndex2Block(UNewTrie2
*trie
) {
514 int32_t newBlock
, newTop
;
516 newBlock
=trie
->index2Length
;
517 newTop
=newBlock
+UTRIE2_INDEX_2_BLOCK_LENGTH
;
518 if(newTop
>UPRV_LENGTHOF(trie
->index2
)) {
520 * Should never occur.
521 * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
522 * or the code writes more values than should be possible.
526 trie
->index2Length
=newTop
;
527 uprv_memcpy(trie
->index2
+newBlock
, trie
->index2
+trie
->index2NullOffset
, UTRIE2_INDEX_2_BLOCK_LENGTH
*4);
532 getIndex2Block(UNewTrie2
*trie
, UChar32 c
, UBool forLSCP
) {
535 if(U_IS_LEAD(c
) && forLSCP
) {
536 return UTRIE2_LSCP_INDEX_2_OFFSET
;
539 i1
=c
>>UTRIE2_SHIFT_1
;
541 if(i2
==trie
->index2NullOffset
) {
542 i2
=allocIndex2Block(trie
);
544 return -1; /* program error */
552 allocDataBlock(UNewTrie2
*trie
, int32_t copyBlock
) {
553 int32_t newBlock
, newTop
;
555 if(trie
->firstFreeBlock
!=0) {
556 /* get the first free block */
557 newBlock
=trie
->firstFreeBlock
;
558 trie
->firstFreeBlock
=-trie
->map
[newBlock
>>UTRIE2_SHIFT_2
];
560 /* get a new block from the high end */
561 newBlock
=trie
->dataLength
;
562 newTop
=newBlock
+UTRIE2_DATA_BLOCK_LENGTH
;
563 if(newTop
>trie
->dataCapacity
) {
564 /* out of memory in the data array */
568 if(trie
->dataCapacity
<UNEWTRIE2_MEDIUM_DATA_LENGTH
) {
569 capacity
=UNEWTRIE2_MEDIUM_DATA_LENGTH
;
570 } else if(trie
->dataCapacity
<UNEWTRIE2_MAX_DATA_LENGTH
) {
571 capacity
=UNEWTRIE2_MAX_DATA_LENGTH
;
574 * Should never occur.
575 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
576 * or the code writes more values than should be possible.
580 data
=(uint32_t *)uprv_malloc(capacity
*4);
584 uprv_memcpy(data
, trie
->data
, (size_t)trie
->dataLength
*4);
585 uprv_free(trie
->data
);
587 trie
->dataCapacity
=capacity
;
589 trie
->dataLength
=newTop
;
591 uprv_memcpy(trie
->data
+newBlock
, trie
->data
+copyBlock
, UTRIE2_DATA_BLOCK_LENGTH
*4);
592 trie
->map
[newBlock
>>UTRIE2_SHIFT_2
]=0;
596 /* call when the block's reference counter reaches 0 */
598 releaseDataBlock(UNewTrie2
*trie
, int32_t block
) {
599 /* put this block at the front of the free-block chain */
600 trie
->map
[block
>>UTRIE2_SHIFT_2
]=-trie
->firstFreeBlock
;
601 trie
->firstFreeBlock
=block
;
605 isWritableBlock(UNewTrie2
*trie
, int32_t block
) {
606 return (UBool
)(block
!=trie
->dataNullOffset
&& 1==trie
->map
[block
>>UTRIE2_SHIFT_2
]);
610 setIndex2Entry(UNewTrie2
*trie
, int32_t i2
, int32_t block
) {
612 ++trie
->map
[block
>>UTRIE2_SHIFT_2
]; /* increment first, in case block==oldBlock! */
613 oldBlock
=trie
->index2
[i2
];
614 if(0 == --trie
->map
[oldBlock
>>UTRIE2_SHIFT_2
]) {
615 releaseDataBlock(trie
, oldBlock
);
617 trie
->index2
[i2
]=block
;
621 * No error checking for illegal arguments.
623 * @return -1 if no new data block available (out of memory in data array)
627 getDataBlock(UNewTrie2
*trie
, UChar32 c
, UBool forLSCP
) {
628 int32_t i2
, oldBlock
, newBlock
;
630 i2
=getIndex2Block(trie
, c
, forLSCP
);
632 return -1; /* program error */
635 i2
+=(c
>>UTRIE2_SHIFT_2
)&UTRIE2_INDEX_2_MASK
;
636 oldBlock
=trie
->index2
[i2
];
637 if(isWritableBlock(trie
, oldBlock
)) {
641 /* allocate a new data block */
642 newBlock
=allocDataBlock(trie
, oldBlock
);
644 /* out of memory in the data array */
647 setIndex2Entry(trie
, i2
, newBlock
);
652 * @return TRUE if the value was successfully set
655 set32(UNewTrie2
*trie
,
656 UChar32 c
, UBool forLSCP
, uint32_t value
,
657 UErrorCode
*pErrorCode
) {
660 if(trie
==NULL
|| trie
->isCompacted
) {
661 *pErrorCode
=U_NO_WRITE_PERMISSION
;
665 umutablecptrie_set(trie
->t3
, c
, value
, pErrorCode
);
668 block
=getDataBlock(trie
, c
, forLSCP
);
670 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
674 trie
->data
[block
+(c
&UTRIE2_DATA_MASK
)]=value
;
677 U_CAPI
void U_EXPORT2
678 utrie2_set32(UTrie2
*trie
, UChar32 c
, uint32_t value
, UErrorCode
*pErrorCode
) {
679 if(U_FAILURE(*pErrorCode
)) {
682 if((uint32_t)c
>0x10ffff) {
683 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
686 set32(trie
->newTrie
, c
, TRUE
, value
, pErrorCode
);
689 U_CAPI
void U_EXPORT2
690 utrie2_set32ForLeadSurrogateCodeUnit(UTrie2
*trie
,
691 UChar32 c
, uint32_t value
,
692 UErrorCode
*pErrorCode
) {
693 if(U_FAILURE(*pErrorCode
)) {
697 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
700 set32(trie
->newTrie
, c
, FALSE
, value
, pErrorCode
);
704 writeBlock(uint32_t *block
, uint32_t value
) {
705 uint32_t *limit
=block
+UTRIE2_DATA_BLOCK_LENGTH
;
712 * initialValue is ignored if overwrite=TRUE
716 fillBlock(uint32_t *block
, UChar32 start
, UChar32 limit
,
717 uint32_t value
, uint32_t initialValue
, UBool overwrite
) {
723 while(block
<pLimit
) {
727 while(block
<pLimit
) {
728 if(*block
==initialValue
) {
736 U_CAPI
void U_EXPORT2
737 utrie2_setRange32(UTrie2
*trie
,
738 UChar32 start
, UChar32 end
,
739 uint32_t value
, UBool overwrite
,
740 UErrorCode
*pErrorCode
) {
742 * repeat value in [start..end]
743 * mark index values for repeat-data blocks by setting bit 31 of the index values
744 * fill around existing values if any, if(overwrite)
747 int32_t block
, rest
, repeatBlock
;
750 if(U_FAILURE(*pErrorCode
)) {
753 if((uint32_t)start
>0x10ffff || (uint32_t)end
>0x10ffff || start
>end
) {
754 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
757 newTrie
=trie
->newTrie
;
758 if(newTrie
==NULL
|| newTrie
->isCompacted
) {
759 *pErrorCode
=U_NO_WRITE_PERMISSION
;
763 umutablecptrie_setRange(newTrie
->t3
, start
, end
, value
, pErrorCode
);
765 if(!overwrite
&& value
==newTrie
->initialValue
) {
766 return; /* nothing to do */
770 if(start
&UTRIE2_DATA_MASK
) {
773 /* set partial block at [start..following block boundary[ */
774 block
=getDataBlock(newTrie
, start
, TRUE
);
776 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
780 nextStart
=(start
+UTRIE2_DATA_MASK
)&~UTRIE2_DATA_MASK
;
781 if(nextStart
<=limit
) {
782 fillBlock(newTrie
->data
+block
, start
&UTRIE2_DATA_MASK
, UTRIE2_DATA_BLOCK_LENGTH
,
783 value
, newTrie
->initialValue
, overwrite
);
786 fillBlock(newTrie
->data
+block
, start
&UTRIE2_DATA_MASK
, limit
&UTRIE2_DATA_MASK
,
787 value
, newTrie
->initialValue
, overwrite
);
792 /* number of positions in the last, partial block */
793 rest
=limit
&UTRIE2_DATA_MASK
;
795 /* round down limit to a block boundary */
796 limit
&=~UTRIE2_DATA_MASK
;
798 /* iterate over all-value blocks */
799 if(value
==newTrie
->initialValue
) {
800 repeatBlock
=newTrie
->dataNullOffset
;
807 UBool setRepeatBlock
=FALSE
;
809 if(value
==newTrie
->initialValue
&& isInNullBlock(newTrie
, start
, TRUE
)) {
810 start
+=UTRIE2_DATA_BLOCK_LENGTH
; /* nothing to do */
814 /* get index value */
815 i2
=getIndex2Block(newTrie
, start
, TRUE
);
817 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
820 i2
+=(start
>>UTRIE2_SHIFT_2
)&UTRIE2_INDEX_2_MASK
;
821 block
=newTrie
->index2
[i2
];
822 if(isWritableBlock(newTrie
, block
)) {
823 /* already allocated */
824 if(overwrite
&& block
>=UNEWTRIE2_DATA_0800_OFFSET
) {
826 * We overwrite all values, and it's not a
827 * protected (ASCII-linear or 2-byte UTF-8) block:
828 * replace with the repeatBlock.
832 /* !overwrite, or protected block: just write the values into this block */
833 fillBlock(newTrie
->data
+block
,
834 0, UTRIE2_DATA_BLOCK_LENGTH
,
835 value
, newTrie
->initialValue
, overwrite
);
837 } else if(newTrie
->data
[block
]!=value
&& (overwrite
|| block
==newTrie
->dataNullOffset
)) {
839 * Set the repeatBlock instead of the null block or previous repeat block:
841 * If !isWritableBlock() then all entries in the block have the same value
842 * because it's the null block or a range block (the repeatBlock from a previous
843 * call to utrie2_setRange32()).
844 * No other blocks are used multiple times before compacting.
846 * The null block is the only non-writable block with the initialValue because
847 * of the repeatBlock initialization above. (If value==initialValue, then
848 * the repeatBlock will be the null data block.)
850 * We set our repeatBlock if the desired value differs from the block's value,
851 * and if we overwrite any data or if the data is all initial values
852 * (which is the same as the block being the null block, see above).
858 setIndex2Entry(newTrie
, i2
, repeatBlock
);
860 /* create and set and fill the repeatBlock */
861 repeatBlock
=getDataBlock(newTrie
, start
, TRUE
);
863 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
866 writeBlock(newTrie
->data
+repeatBlock
, value
);
870 start
+=UTRIE2_DATA_BLOCK_LENGTH
;
874 /* set partial block at [last block boundary..limit[ */
875 block
=getDataBlock(newTrie
, start
, TRUE
);
877 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
881 fillBlock(newTrie
->data
+block
, 0, rest
, value
, newTrie
->initialValue
, overwrite
);
887 /* compaction --------------------------------------------------------------- */
890 equal_int32(const int32_t *s
, const int32_t *t
, int32_t length
) {
891 while(length
>0 && *s
==*t
) {
896 return (UBool
)(length
==0);
900 equal_uint32(const uint32_t *s
, const uint32_t *t
, int32_t length
) {
901 while(length
>0 && *s
==*t
) {
906 return (UBool
)(length
==0);
910 findSameIndex2Block(const int32_t *idx
, int32_t index2Length
, int32_t otherBlock
) {
913 /* ensure that we do not even partially get past index2Length */
914 index2Length
-=UTRIE2_INDEX_2_BLOCK_LENGTH
;
916 for(block
=0; block
<=index2Length
; ++block
) {
917 if(equal_int32(idx
+block
, idx
+otherBlock
, UTRIE2_INDEX_2_BLOCK_LENGTH
)) {
925 findSameDataBlock(const uint32_t *data
, int32_t dataLength
, int32_t otherBlock
, int32_t blockLength
) {
928 /* ensure that we do not even partially get past dataLength */
929 dataLength
-=blockLength
;
931 for(block
=0; block
<=dataLength
; block
+=UTRIE2_DATA_GRANULARITY
) {
932 if(equal_uint32(data
+block
, data
+otherBlock
, blockLength
)) {
940 * Find the start of the last range in the trie by enumerating backward.
941 * Indexes for supplementary code points higher than this will be omitted.
944 findHighStart(UNewTrie2
*trie
, uint32_t highValue
) {
945 const uint32_t *data32
;
947 uint32_t value
, initialValue
;
949 int32_t i1
, i2
, j
, i2Block
, prevI2Block
, index2NullOffset
, block
, prevBlock
, nullBlock
;
952 initialValue
=trie
->initialValue
;
954 index2NullOffset
=trie
->index2NullOffset
;
955 nullBlock
=trie
->dataNullOffset
;
957 /* set variables for previous range */
958 if(highValue
==initialValue
) {
959 prevI2Block
=index2NullOffset
;
967 /* enumerate index-2 blocks */
968 i1
=UNEWTRIE2_INDEX_1_LENGTH
;
971 i2Block
=trie
->index1
[--i1
];
972 if(i2Block
==prevI2Block
) {
973 /* the index-2 block is the same as the previous one, and filled with highValue */
974 c
-=UTRIE2_CP_PER_INDEX_1_ENTRY
;
978 if(i2Block
==index2NullOffset
) {
979 /* this is the null index-2 block */
980 if(highValue
!=initialValue
) {
983 c
-=UTRIE2_CP_PER_INDEX_1_ENTRY
;
985 /* enumerate data blocks for one index-2 block */
986 for(i2
=UTRIE2_INDEX_2_BLOCK_LENGTH
; i2
>0;) {
987 block
=trie
->index2
[i2Block
+ --i2
];
988 if(block
==prevBlock
) {
989 /* the block is the same as the previous one, and filled with highValue */
990 c
-=UTRIE2_DATA_BLOCK_LENGTH
;
994 if(block
==nullBlock
) {
995 /* this is the null data block */
996 if(highValue
!=initialValue
) {
999 c
-=UTRIE2_DATA_BLOCK_LENGTH
;
1001 for(j
=UTRIE2_DATA_BLOCK_LENGTH
; j
>0;) {
1002 value
=data32
[block
+ --j
];
1003 if(value
!=highValue
) {
1013 /* deliver last range */
1018 * Compact a build-time trie.
1021 * - removes blocks that are identical with earlier ones
1022 * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
1023 * - moves blocks in steps of the data granularity
1024 * - moves and overlaps blocks that overlap with multiple values in the overlap region
1027 * - try to move and overlap blocks that are not already adjacent
1030 compactData(UNewTrie2
*trie
) {
1032 int32_t countSame
=0, sumOverlaps
=0;
1035 int32_t start
, newStart
, movedStart
;
1036 int32_t blockLength
, overlap
;
1037 int32_t i
, mapIndex
, blockCount
;
1039 /* do not compact linear-ASCII data */
1040 newStart
=UTRIE2_DATA_START_OFFSET
;
1041 for(start
=0, i
=0; start
<newStart
; start
+=UTRIE2_DATA_BLOCK_LENGTH
, ++i
) {
1046 * Start with a block length of 64 for 2-byte UTF-8,
1047 * then switch to UTRIE2_DATA_BLOCK_LENGTH.
1050 blockCount
=blockLength
>>UTRIE2_SHIFT_2
;
1051 for(start
=newStart
; start
<trie
->dataLength
;) {
1053 * start: index of first entry of current block
1054 * newStart: index where the current block is to be moved
1055 * (right after current end of already-compacted data)
1057 if(start
==UNEWTRIE2_DATA_0800_OFFSET
) {
1058 blockLength
=UTRIE2_DATA_BLOCK_LENGTH
;
1062 /* skip blocks that are not used */
1063 if(trie
->map
[start
>>UTRIE2_SHIFT_2
]<=0) {
1064 /* advance start to the next block */
1067 /* leave newStart with the previous block! */
1071 /* search for an identical block */
1072 if( (movedStart
=findSameDataBlock(trie
->data
, newStart
, start
, blockLength
))
1078 /* found an identical block, set the other block's index value for the current block */
1079 for(i
=blockCount
, mapIndex
=start
>>UTRIE2_SHIFT_2
; i
>0; --i
) {
1080 trie
->map
[mapIndex
++]=movedStart
;
1081 movedStart
+=UTRIE2_DATA_BLOCK_LENGTH
;
1084 /* advance start to the next block */
1087 /* leave newStart with the previous block! */
1091 /* see if the beginning of this block can be overlapped with the end of the previous block */
1092 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
1093 for(overlap
=blockLength
-UTRIE2_DATA_GRANULARITY
;
1094 overlap
>0 && !equal_uint32(trie
->data
+(newStart
-overlap
), trie
->data
+start
, overlap
);
1095 overlap
-=UTRIE2_DATA_GRANULARITY
) {}
1098 sumOverlaps
+=overlap
;
1100 if(overlap
>0 || newStart
<start
) {
1101 /* some overlap, or just move the whole block */
1102 movedStart
=newStart
-overlap
;
1103 for(i
=blockCount
, mapIndex
=start
>>UTRIE2_SHIFT_2
; i
>0; --i
) {
1104 trie
->map
[mapIndex
++]=movedStart
;
1105 movedStart
+=UTRIE2_DATA_BLOCK_LENGTH
;
1108 /* move the non-overlapping indexes to their new positions */
1110 for(i
=blockLength
-overlap
; i
>0; --i
) {
1111 trie
->data
[newStart
++]=trie
->data
[start
++];
1113 } else /* no overlap && newStart==start */ {
1114 for(i
=blockCount
, mapIndex
=start
>>UTRIE2_SHIFT_2
; i
>0; --i
) {
1115 trie
->map
[mapIndex
++]=start
;
1116 start
+=UTRIE2_DATA_BLOCK_LENGTH
;
1122 /* now adjust the index-2 table */
1123 for(i
=0; i
<trie
->index2Length
; ++i
) {
1124 if(i
==UNEWTRIE2_INDEX_GAP_OFFSET
) {
1125 /* Gap indexes are invalid (-1). Skip over the gap. */
1126 i
+=UNEWTRIE2_INDEX_GAP_LENGTH
;
1128 trie
->index2
[i
]=trie
->map
[trie
->index2
[i
]>>UTRIE2_SHIFT_2
];
1130 trie
->dataNullOffset
=trie
->map
[trie
->dataNullOffset
>>UTRIE2_SHIFT_2
];
1132 /* ensure dataLength alignment */
1133 while((newStart
&(UTRIE2_DATA_GRANULARITY
-1))!=0) {
1134 trie
->data
[newStart
++]=trie
->initialValue
;
1138 /* we saved some space */
1139 printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
1140 (long)trie
->dataLength
, (long)newStart
, (long)countSame
, (long)sumOverlaps
);
1143 trie
->dataLength
=newStart
;
1147 compactIndex2(UNewTrie2
*trie
) {
1148 int32_t i
, start
, newStart
, movedStart
, overlap
;
1150 /* do not compact linear-BMP index-2 blocks */
1151 newStart
=UTRIE2_INDEX_2_BMP_LENGTH
;
1152 for(start
=0, i
=0; start
<newStart
; start
+=UTRIE2_INDEX_2_BLOCK_LENGTH
, ++i
) {
1156 /* Reduce the index table gap to what will be needed at runtime. */
1157 newStart
+=UTRIE2_UTF8_2B_INDEX_2_LENGTH
+((trie
->highStart
-0x10000)>>UTRIE2_SHIFT_1
);
1159 for(start
=UNEWTRIE2_INDEX_2_NULL_OFFSET
; start
<trie
->index2Length
;) {
1161 * start: index of first entry of current block
1162 * newStart: index where the current block is to be moved
1163 * (right after current end of already-compacted data)
1166 /* search for an identical block */
1167 if( (movedStart
=findSameIndex2Block(trie
->index2
, newStart
, start
))
1170 /* found an identical block, set the other block's index value for the current block */
1171 trie
->map
[start
>>UTRIE2_SHIFT_1_2
]=movedStart
;
1173 /* advance start to the next block */
1174 start
+=UTRIE2_INDEX_2_BLOCK_LENGTH
;
1176 /* leave newStart with the previous block! */
1180 /* see if the beginning of this block can be overlapped with the end of the previous block */
1181 /* look for maximum overlap with the previous, adjacent block */
1182 for(overlap
=UTRIE2_INDEX_2_BLOCK_LENGTH
-1;
1183 overlap
>0 && !equal_int32(trie
->index2
+(newStart
-overlap
), trie
->index2
+start
, overlap
);
1186 if(overlap
>0 || newStart
<start
) {
1187 /* some overlap, or just move the whole block */
1188 trie
->map
[start
>>UTRIE2_SHIFT_1_2
]=newStart
-overlap
;
1190 /* move the non-overlapping indexes to their new positions */
1192 for(i
=UTRIE2_INDEX_2_BLOCK_LENGTH
-overlap
; i
>0; --i
) {
1193 trie
->index2
[newStart
++]=trie
->index2
[start
++];
1195 } else /* no overlap && newStart==start */ {
1196 trie
->map
[start
>>UTRIE2_SHIFT_1_2
]=start
;
1197 start
+=UTRIE2_INDEX_2_BLOCK_LENGTH
;
1202 /* now adjust the index-1 table */
1203 for(i
=0; i
<UNEWTRIE2_INDEX_1_LENGTH
; ++i
) {
1204 trie
->index1
[i
]=trie
->map
[trie
->index1
[i
]>>UTRIE2_SHIFT_1_2
];
1206 trie
->index2NullOffset
=trie
->map
[trie
->index2NullOffset
>>UTRIE2_SHIFT_1_2
];
1209 * Ensure data table alignment:
1210 * Needs to be granularity-aligned for 16-bit trie
1211 * (so that dataMove will be down-shiftable),
1212 * and 2-aligned for uint32_t data.
1214 while((newStart
&((UTRIE2_DATA_GRANULARITY
-1)|1))!=0) {
1215 /* Arbitrary value: 0x3fffc not possible for real data. */
1216 trie
->index2
[newStart
++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT
;
1220 /* we saved some space */
1221 printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n",
1222 (long)trie
->index2Length
, (long)newStart
);
1225 trie
->index2Length
=newStart
;
1229 compactTrie(UTrie2
*trie
, UErrorCode
*pErrorCode
) {
1231 UChar32 highStart
, suppHighStart
;
1234 newTrie
=trie
->newTrie
;
1236 /* find highStart and round it up */
1237 highValue
=utrie2_get32(trie
, 0x10ffff);
1238 highStart
=findHighStart(newTrie
, highValue
);
1239 highStart
=(highStart
+(UTRIE2_CP_PER_INDEX_1_ENTRY
-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY
-1);
1240 if(highStart
==0x110000) {
1241 highValue
=trie
->errorValue
;
1245 * Set trie->highStart only after utrie2_get32(trie, highStart).
1246 * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
1248 trie
->highStart
=newTrie
->highStart
=highStart
;
1251 printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
1252 (long)highStart
, (long)highValue
, (long)trie
->initialValue
);
1255 if(highStart
<0x110000) {
1256 /* Blank out [highStart..10ffff] to release associated data blocks. */
1257 suppHighStart
= highStart
<=0x10000 ? 0x10000 : highStart
;
1258 utrie2_setRange32(trie
, suppHighStart
, 0x10ffff, trie
->initialValue
, TRUE
, pErrorCode
);
1259 if(U_FAILURE(*pErrorCode
)) {
1264 compactData(newTrie
);
1265 if(highStart
>0x10000) {
1266 compactIndex2(newTrie
);
1269 printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n",
1270 (long)highStart
, (long)trie
->newTrie
->index2Length
, (long)UTRIE2_INDEX_1_OFFSET
);
1275 * Store the highValue in the data array and round up the dataLength.
1276 * Must be done after compactData() because that assumes that dataLength
1277 * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
1279 newTrie
->data
[newTrie
->dataLength
++]=highValue
;
1280 while((newTrie
->dataLength
&(UTRIE2_DATA_GRANULARITY
-1))!=0) {
1281 newTrie
->data
[newTrie
->dataLength
++]=trie
->initialValue
;
1284 newTrie
->isCompacted
=TRUE
;
1287 /* serialization ------------------------------------------------------------ */
1290 * Maximum length of the runtime index array.
1291 * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
1292 * (The actual maximum length is lower,
1293 * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
1295 #define UTRIE2_MAX_INDEX_LENGTH 0xffff
1298 * Maximum length of the runtime data array.
1299 * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
1300 * and by uint16_t UTrie2Header.shiftedDataLength.
1302 #define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
1304 /* Compact and internally serialize the trie. */
1305 U_CAPI
void U_EXPORT2
1306 utrie2_freeze(UTrie2
*trie
, UTrie2ValueBits valueBits
, UErrorCode
*pErrorCode
) {
1308 UTrie2Header
*header
;
1312 int32_t allIndexesLength
;
1313 int32_t dataMove
; /* >0 if the data is moved to the end of the index array */
1316 /* argument check */
1317 if(U_FAILURE(*pErrorCode
)) {
1321 valueBits
<0 || UTRIE2_COUNT_VALUE_BITS
<=valueBits
1323 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1326 newTrie
=trie
->newTrie
;
1328 /* already frozen */
1329 UTrie2ValueBits frozenValueBits
=
1330 trie
->data16
!=NULL
? UTRIE2_16_VALUE_BITS
: UTRIE2_32_VALUE_BITS
;
1331 if(valueBits
!=frozenValueBits
) {
1332 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1337 /* compact if necessary */
1338 if(!newTrie
->isCompacted
) {
1339 compactTrie(trie
, pErrorCode
);
1340 if(U_FAILURE(*pErrorCode
)) {
1344 highStart
=trie
->highStart
;
1346 if(highStart
<=0x10000) {
1347 allIndexesLength
=UTRIE2_INDEX_1_OFFSET
;
1349 allIndexesLength
=newTrie
->index2Length
;
1351 if(valueBits
==UTRIE2_16_VALUE_BITS
) {
1352 dataMove
=allIndexesLength
;
1357 /* are indexLength and dataLength within limits? */
1358 if( /* for unshifted indexLength */
1359 allIndexesLength
>UTRIE2_MAX_INDEX_LENGTH
||
1360 /* for unshifted dataNullOffset */
1361 (dataMove
+newTrie
->dataNullOffset
)>0xffff ||
1362 /* for unshifted 2-byte UTF-8 index-2 values */
1363 (dataMove
+UNEWTRIE2_DATA_0800_OFFSET
)>0xffff ||
1364 /* for shiftedDataLength */
1365 (dataMove
+newTrie
->dataLength
)>UTRIE2_MAX_DATA_LENGTH
1367 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
1371 /* calculate the total serialized length */
1372 length
=sizeof(UTrie2Header
)+allIndexesLength
*2;
1373 if(valueBits
==UTRIE2_16_VALUE_BITS
) {
1374 length
+=newTrie
->dataLength
*2;
1376 length
+=newTrie
->dataLength
*4;
1379 trie
->memory
=uprv_malloc(length
);
1380 if(trie
->memory
==NULL
) {
1381 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1384 trie
->length
=length
;
1385 trie
->isMemoryOwned
=TRUE
;
1387 trie
->indexLength
=allIndexesLength
;
1388 trie
->dataLength
=newTrie
->dataLength
;
1389 if(highStart
<=0x10000) {
1390 trie
->index2NullOffset
=0xffff;
1392 trie
->index2NullOffset
=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET
+newTrie
->index2NullOffset
);
1394 trie
->dataNullOffset
=(uint16_t)(dataMove
+newTrie
->dataNullOffset
);
1395 trie
->highValueIndex
=dataMove
+trie
->dataLength
-UTRIE2_DATA_GRANULARITY
;
1397 /* set the header fields */
1398 header
=(UTrie2Header
*)trie
->memory
;
1400 header
->signature
=UTRIE2_SIG
; /* "Tri2" */
1401 header
->options
=(uint16_t)valueBits
;
1403 header
->indexLength
=(uint16_t)trie
->indexLength
;
1404 header
->shiftedDataLength
=(uint16_t)(trie
->dataLength
>>UTRIE2_INDEX_SHIFT
);
1405 header
->index2NullOffset
=trie
->index2NullOffset
;
1406 header
->dataNullOffset
=trie
->dataNullOffset
;
1407 header
->shiftedHighStart
=(uint16_t)(highStart
>>UTRIE2_SHIFT_1
);
1409 /* fill the index and data arrays */
1410 dest16
=(uint16_t *)(header
+1);
1413 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
1414 p
=(uint32_t *)newTrie
->index2
;
1415 for(i
=UTRIE2_INDEX_2_BMP_LENGTH
; i
>0; --i
) {
1416 *dest16
++=(uint16_t)((dataMove
+ *p
++)>>UTRIE2_INDEX_SHIFT
);
1419 /* write UTF-8 2-byte index-2 values, not right-shifted */
1420 for(i
=0; i
<(0xc2-0xc0); ++i
) { /* C0..C1 */
1421 *dest16
++=(uint16_t)(dataMove
+UTRIE2_BAD_UTF8_DATA_OFFSET
);
1423 for(; i
<(0xe0-0xc0); ++i
) { /* C2..DF */
1424 *dest16
++=(uint16_t)(dataMove
+newTrie
->index2
[i
<<(6-UTRIE2_SHIFT_2
)]);
1427 if(highStart
>0x10000) {
1428 int32_t index1Length
=(highStart
-0x10000)>>UTRIE2_SHIFT_1
;
1429 int32_t index2Offset
=UTRIE2_INDEX_2_BMP_LENGTH
+UTRIE2_UTF8_2B_INDEX_2_LENGTH
+index1Length
;
1431 /* write 16-bit index-1 values for supplementary code points */
1432 p
=(uint32_t *)newTrie
->index1
+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH
;
1433 for(i
=index1Length
; i
>0; --i
) {
1434 *dest16
++=(uint16_t)(UTRIE2_INDEX_2_OFFSET
+ *p
++);
1438 * write the index-2 array values for supplementary code points,
1439 * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
1441 p
=(uint32_t *)newTrie
->index2
+index2Offset
;
1442 for(i
=newTrie
->index2Length
-index2Offset
; i
>0; --i
) {
1443 *dest16
++=(uint16_t)((dataMove
+ *p
++)>>UTRIE2_INDEX_SHIFT
);
1447 /* write the 16/32-bit data array */
1449 case UTRIE2_16_VALUE_BITS
:
1450 /* write 16-bit data values */
1451 trie
->data16
=dest16
;
1454 for(i
=newTrie
->dataLength
; i
>0; --i
) {
1455 *dest16
++=(uint16_t)*p
++;
1458 case UTRIE2_32_VALUE_BITS
:
1459 /* write 32-bit data values */
1461 trie
->data32
=(uint32_t *)dest16
;
1462 uprv_memcpy(dest16
, newTrie
->data
, (size_t)newTrie
->dataLength
*4);
1465 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1470 utrie2_printLengths(trie
, "");
1473 #ifdef UCPTRIE_DEBUG
1474 umutablecptrie_setName(newTrie
->t3
, trie
->name
);
1476 umutablecptrie_buildImmutable(
1477 newTrie
->t3
, UCPTRIE_TYPE_FAST
, (UCPTrieValueWidth
)valueBits
, pErrorCode
));
1479 /* Delete the UNewTrie2. */
1480 uprv_free(newTrie
->data
);