2 *******************************************************************************
4 * Copyright (C) 2001-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_bld.cpp
10 * tab size: 8 (not used)
14 * created by: Vladimir Weinstein
16 * This module builds a collator based on the rule set.
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_COLLATION
24 #include "unicode/ucoleitr.h"
25 #include "unicode/udata.h"
26 #include "unicode/uchar.h"
27 #include "unicode/uniset.h"
36 static const InverseUCATableHeader
* _staticInvUCA
= NULL
;
37 static UDataMemory
* invUCA_DATA_MEM
= NULL
;
40 static UBool U_CALLCONV
41 isAcceptableInvUCA(void * /*context*/,
42 const char * /*type*/, const char * /*name*/,
43 const UDataInfo
*pInfo
)
45 /* context, type & name are intentionally not used */
46 if( pInfo
->size
>=20 &&
47 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
48 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
49 pInfo
->dataFormat
[0]==INVUCA_DATA_FORMAT_0
&& /* dataFormat="InvC" */
50 pInfo
->dataFormat
[1]==INVUCA_DATA_FORMAT_1
&&
51 pInfo
->dataFormat
[2]==INVUCA_DATA_FORMAT_2
&&
52 pInfo
->dataFormat
[3]==INVUCA_DATA_FORMAT_3
&&
53 pInfo
->formatVersion
[0]==INVUCA_FORMAT_VERSION_0
&&
54 pInfo
->formatVersion
[1]>=INVUCA_FORMAT_VERSION_1
//&&
55 //pInfo->formatVersion[1]==INVUCA_FORMAT_VERSION_1 &&
56 //pInfo->formatVersion[2]==INVUCA_FORMAT_VERSION_2 &&
57 //pInfo->formatVersion[3]==INVUCA_FORMAT_VERSION_3 &&
60 UVersionInfo UCDVersion
;
61 u_getUnicodeVersion(UCDVersion
);
62 return (pInfo
->dataVersion
[0]==UCDVersion
[0] &&
63 pInfo
->dataVersion
[1]==UCDVersion
[1]);
64 //pInfo->dataVersion[1]==invUcaDataInfo.dataVersion[1] &&
65 //pInfo->dataVersion[2]==invUcaDataInfo.dataVersion[2] &&
66 //pInfo->dataVersion[3]==invUcaDataInfo.dataVersion[3]) {
74 * Takes two CEs (lead and continuation) and
75 * compares them as CEs should be compared:
76 * primary vs. primary, secondary vs. secondary
77 * tertiary vs. tertiary
79 static int32_t compareCEs(uint32_t source0
, uint32_t source1
, uint32_t target0
, uint32_t target1
) {
80 uint32_t s1
= source0
, s2
, t1
= target0
, t2
;
81 if(isContinuation(source1
)) {
86 if(isContinuation(target1
)) {
92 uint32_t s
= 0, t
= 0;
93 if(s1
== t1
&& s2
== t2
) {
96 s
= (s1
& 0xFFFF0000)|((s2
& 0xFFFF0000)>>16);
97 t
= (t1
& 0xFFFF0000)|((t2
& 0xFFFF0000)>>16);
103 s
= (s1
& 0x0000FF00) | (s2
& 0x0000FF00)>>8;
104 t
= (t1
& 0x0000FF00) | (t2
& 0x0000FF00)>>8;
110 s
= (s1
& 0x000000FF)<<8 | (s2
& 0x000000FF);
111 t
= (t1
& 0x000000FF)<<8 | (t2
& 0x000000FF);
122 int32_t ucol_inv_findCE(const UColTokenParser
*src
, uint32_t CE
, uint32_t SecondCE
) {
123 uint32_t bottom
= 0, top
= src
->invUCA
->tableSize
;
125 uint32_t first
= 0, second
= 0;
126 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
129 while(bottom
< top
-1) {
131 first
= *(CETable
+3*i
);
132 second
= *(CETable
+3*i
+1);
133 res
= compareCEs(first
, second
, CE
, SecondCE
);
144 /* in searching for elements, I have removed the failure */
145 /* The reason for this is that the builder does not rely */
146 /* on search mechanism telling it that it didn't find an */
147 /* element. However, indirect positioning relies on being */
148 /* able to find the elements around any CE, even if it is */
149 /* not defined in the UCA. */
152 if((first == CE && second == SecondCE)) {
160 static const uint32_t strengthMask
[UCOL_CE_STRENGTH_LIMIT
] = {
166 U_CAPI
int32_t U_EXPORT2
ucol_inv_getNextCE(const UColTokenParser
*src
,
167 uint32_t CE
, uint32_t contCE
,
168 uint32_t *nextCE
, uint32_t *nextContCE
,
171 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
174 iCE
= ucol_inv_findCE(src
, CE
, contCE
);
177 *nextCE
= UCOL_NOT_FOUND
;
181 CE
&= strengthMask
[strength
];
182 contCE
&= strengthMask
[strength
];
185 *nextContCE
= contCE
;
187 while((*nextCE
& strengthMask
[strength
]) == CE
188 && (*nextContCE
& strengthMask
[strength
]) == contCE
)
190 *nextCE
= (*(CETable
+3*(++iCE
)));
191 *nextContCE
= (*(CETable
+3*(iCE
)+1));
197 U_CFUNC
int32_t U_EXPORT2
ucol_inv_getPrevCE(const UColTokenParser
*src
,
198 uint32_t CE
, uint32_t contCE
,
199 uint32_t *prevCE
, uint32_t *prevContCE
,
202 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
205 iCE
= ucol_inv_findCE(src
, CE
, contCE
);
208 *prevCE
= UCOL_NOT_FOUND
;
212 CE
&= strengthMask
[strength
];
213 contCE
&= strengthMask
[strength
];
216 *prevContCE
= contCE
;
218 while((*prevCE
& strengthMask
[strength
]) == CE
219 && (*prevContCE
& strengthMask
[strength
])== contCE
220 && iCE
> 0) /* this condition should prevent falling off the edge of the world */
222 /* here, we end up in a singularity - zero */
223 *prevCE
= (*(CETable
+3*(--iCE
)));
224 *prevContCE
= (*(CETable
+3*(iCE
)+1));
230 U_CFUNC
uint32_t U_EXPORT2
ucol_getCEStrengthDifference(uint32_t CE
, uint32_t contCE
,
231 uint32_t prevCE
, uint32_t prevContCE
)
233 if(prevCE
== CE
&& prevContCE
== contCE
) {
234 return UCOL_IDENTICAL
;
236 if((prevCE
& strengthMask
[UCOL_PRIMARY
]) != (CE
& strengthMask
[UCOL_PRIMARY
])
237 || (prevContCE
& strengthMask
[UCOL_PRIMARY
]) != (contCE
& strengthMask
[UCOL_PRIMARY
]))
241 if((prevCE
& strengthMask
[UCOL_SECONDARY
]) != (CE
& strengthMask
[UCOL_SECONDARY
])
242 || (prevContCE
& strengthMask
[UCOL_SECONDARY
]) != (contCE
& strengthMask
[UCOL_SECONDARY
]))
244 return UCOL_SECONDARY
;
246 return UCOL_TERTIARY
;
251 inline int32_t ucol_inv_getPrevious(UColTokenParser *src, UColTokListHeader *lh, uint32_t strength) {
253 uint32_t CE = lh->baseCE;
254 uint32_t SecondCE = lh->baseContCE;
256 uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
257 uint32_t previousCE, previousContCE;
260 iCE = ucol_inv_findCE(src, CE, SecondCE);
266 CE &= strengthMask[strength];
267 SecondCE &= strengthMask[strength];
270 previousContCE = SecondCE;
272 while((previousCE & strengthMask[strength]) == CE && (previousContCE & strengthMask[strength])== SecondCE) {
273 previousCE = (*(CETable+3*(--iCE)));
274 previousContCE = (*(CETable+3*(iCE)+1));
276 lh->previousCE = previousCE;
277 lh->previousContCE = previousContCE;
283 inline int32_t ucol_inv_getNext(UColTokenParser
*src
, UColTokListHeader
*lh
, uint32_t strength
) {
284 uint32_t CE
= lh
->baseCE
;
285 uint32_t SecondCE
= lh
->baseContCE
;
287 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
288 uint32_t nextCE
, nextContCE
;
291 iCE
= ucol_inv_findCE(src
, CE
, SecondCE
);
297 CE
&= strengthMask
[strength
];
298 SecondCE
&= strengthMask
[strength
];
301 nextContCE
= SecondCE
;
303 while((nextCE
& strengthMask
[strength
]) == CE
304 && (nextContCE
& strengthMask
[strength
]) == SecondCE
)
306 nextCE
= (*(CETable
+3*(++iCE
)));
307 nextContCE
= (*(CETable
+3*(iCE
)+1));
311 lh
->nextContCE
= nextContCE
;
316 static void ucol_inv_getGapPositions(UColTokenParser
*src
, UColTokListHeader
*lh
, UErrorCode
*status
) {
317 /* reset all the gaps */
319 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
324 UColToken
*tok
= lh
->first
;
325 uint32_t tokStrength
= tok
->strength
;
327 for(i
= 0; i
<3; i
++) {
329 lh
->gapsHi
[3*i
+1] = 0;
330 lh
->gapsHi
[3*i
+2] = 0;
332 lh
->gapsLo
[3*i
+1] = 0;
333 lh
->gapsLo
[3*i
+2] = 0;
335 lh
->fStrToken
[i
] = NULL
;
336 lh
->lStrToken
[i
] = NULL
;
340 UCAConstants
*consts
= (UCAConstants
*)((uint8_t *)src
->UCA
->image
+ src
->UCA
->image
->UCAConsts
);
342 if((lh
->baseCE
& 0xFF000000)>= (consts
->UCA_PRIMARY_IMPLICIT_MIN
<<24) && (lh
->baseCE
& 0xFF000000) <= (consts
->UCA_PRIMARY_IMPLICIT_MAX
<<24) ) { /* implicits - */
343 //if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */
346 t2
= lh
->baseContCE
& UCOL_REMOVE_CONTINUATION
;
347 lh
->gapsLo
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
348 lh
->gapsLo
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
349 lh
->gapsLo
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
350 uint32_t primaryCE
= t1
& UCOL_PRIMARYMASK
| (t2
& UCOL_PRIMARYMASK
) >> 16;
351 primaryCE
= uprv_uca_getImplicitFromRaw(uprv_uca_getRawFromImplicit(primaryCE
)+1);
353 t1
= primaryCE
& UCOL_PRIMARYMASK
| 0x0505;
354 t2
= (primaryCE
<< 16) & UCOL_PRIMARYMASK
; // | UCOL_CONTINUATION_MARKER;
356 lh
->gapsHi
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
357 lh
->gapsHi
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
358 lh
->gapsHi
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
359 } else if(lh
->indirect
== TRUE
&& lh
->nextCE
!= 0) {
360 //} else if(lh->baseCE == UCOL_RESET_TOP_VALUE && lh->baseContCE == 0) {
363 t2
= lh
->baseContCE
&UCOL_REMOVE_CONTINUATION
;
364 lh
->gapsLo
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
365 lh
->gapsLo
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
366 lh
->gapsLo
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
368 t2
= lh
->nextContCE
&UCOL_REMOVE_CONTINUATION
;
369 lh
->gapsHi
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
370 lh
->gapsHi
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
371 lh
->gapsHi
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
374 if(tokStrength
< UCOL_CE_STRENGTH_LIMIT
) {
375 if((lh
->pos
[tokStrength
] = ucol_inv_getNext(src
, lh
, tokStrength
)) >= 0) {
376 lh
->fStrToken
[tokStrength
] = tok
;
377 } else { /* The CE must be implicit, since it's not in the table */
379 *status
= U_INTERNAL_PROGRAM_ERROR
;
383 while(tok
!= NULL
&& tok
->strength
>= tokStrength
) {
384 if(tokStrength
< UCOL_CE_STRENGTH_LIMIT
) {
385 lh
->lStrToken
[tokStrength
] = tok
;
389 if(tokStrength
< UCOL_CE_STRENGTH_LIMIT
-1) {
390 /* check if previous interval is the same and merge the intervals if it is so */
391 if(lh
->pos
[tokStrength
] == lh
->pos
[tokStrength
+1]) {
392 lh
->fStrToken
[tokStrength
] = lh
->fStrToken
[tokStrength
+1];
393 lh
->fStrToken
[tokStrength
+1] = NULL
;
394 lh
->lStrToken
[tokStrength
+1] = NULL
;
395 lh
->pos
[tokStrength
+1] = -1;
399 tokStrength
= tok
->strength
;
404 for(st
= 0; st
< 3; st
++) {
405 if((pos
= lh
->pos
[st
]) >= 0) {
406 t1
= *(CETable
+3*(pos
));
407 t2
= *(CETable
+3*(pos
)+1);
408 lh
->gapsHi
[3*st
] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
409 lh
->gapsHi
[3*st
+1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
410 //lh->gapsHi[3*st+2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
411 lh
->gapsHi
[3*st
+2] = (t1
&0x3f) << 24 | (t2
&0x3f) << 16;
413 //t1 = *(CETable+3*(pos));
414 //t2 = *(CETable+3*(pos)+1);
417 lh
->gapsLo
[3*st
] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
418 lh
->gapsLo
[3*st
+1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
419 lh
->gapsLo
[3*st
+2] = (t1
&0x3f) << 24 | (t2
&0x3f) << 16;
426 #define ucol_countBytes(value, noOfBytes) \
428 uint32_t mask = 0xFFFFFFFF; \
431 if(((value) & mask) != 0) { \
438 static uint32_t ucol_getNextGenerated(ucolCEGenerator
*g
, UErrorCode
*status
) {
439 if(U_SUCCESS(*status
)) {
440 g
->current
= ucol_nextWeight(g
->ranges
, &g
->noOfRanges
);
445 static uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator
*g
, UColToken
*tok
, uint32_t strength
, UErrorCode
*status
) {
446 /* TODO: rename to enum names */
447 uint32_t high
, low
, count
=1;
448 uint32_t maxByte
= (strength
== UCOL_TERTIARY
)?0x3F:0xFF;
450 if(strength
== UCOL_SECONDARY
) {
451 low
= UCOL_COMMON_TOP2
<<24;
453 count
= 0xFF - UCOL_COMMON_TOP2
;
455 low
= UCOL_BYTE_COMMON
<< 24; //0x05000000;
457 count
= 0x40 - UCOL_BYTE_COMMON
;
460 if(tok
->next
!= NULL
&& tok
->next
->strength
== strength
) {
461 count
= tok
->next
->toInsert
;
464 g
->noOfRanges
= ucol_allocWeights(low
, high
, count
, maxByte
, g
->ranges
);
465 g
->current
= UCOL_BYTE_COMMON
<<24;
467 if(g
->noOfRanges
== 0) {
468 *status
= U_INTERNAL_PROGRAM_ERROR
;
473 static uint32_t ucol_getCEGenerator(ucolCEGenerator
*g
, uint32_t* lows
, uint32_t* highs
, UColToken
*tok
, uint32_t fStrength
, UErrorCode
*status
) {
474 uint32_t strength
= tok
->strength
;
475 uint32_t low
= lows
[fStrength
*3+strength
];
476 uint32_t high
= highs
[fStrength
*3+strength
];
477 uint32_t maxByte
= 0;
478 if(strength
== UCOL_TERTIARY
) {
480 } else if(strength
== UCOL_PRIMARY
) {
486 uint32_t count
= tok
->toInsert
;
488 if(low
>= high
&& strength
> UCOL_PRIMARY
) {
489 int32_t s
= strength
;
492 if(lows
[fStrength
*3+s
] != highs
[fStrength
*3+s
]) {
493 if(strength
== UCOL_SECONDARY
) {
494 if (low
< UCOL_COMMON_TOP2
<<24 ) {
495 // Override if low range is less than UCOL_COMMON_TOP2.
496 low
= UCOL_COMMON_TOP2
<<24;
500 // Override if low range is less than UCOL_COMMON_BOT3.
501 if ( low
< UCOL_COMMON_BOT3
<<24 ) {
502 low
= UCOL_COMMON_BOT3
<<24;
509 *status
= U_INTERNAL_PROGRAM_ERROR
;
519 if(strength
== UCOL_SECONDARY
) { /* similar as simple */
520 if(low
>= (UCOL_COMMON_BOT2
<<24) && low
< (uint32_t)(UCOL_COMMON_TOP2
<<24)) {
521 low
= UCOL_COMMON_TOP2
<<24;
523 if(high
> (UCOL_COMMON_BOT2
<<24) && high
< (uint32_t)(UCOL_COMMON_TOP2
<<24)) {
524 high
= UCOL_COMMON_TOP2
<<24;
526 if(low
< (UCOL_COMMON_BOT2
<<24)) {
527 g
->noOfRanges
= ucol_allocWeights(UCOL_BYTE_UNSHIFTED_MIN
<<24, high
, count
, maxByte
, g
->ranges
);
528 g
->current
= ucol_nextWeight(g
->ranges
, &g
->noOfRanges
);
529 //g->current = UCOL_COMMON_BOT2<<24;
534 g
->noOfRanges
= ucol_allocWeights(low
, high
, count
, maxByte
, g
->ranges
);
535 if(g
->noOfRanges
== 0) {
536 *status
= U_INTERNAL_PROGRAM_ERROR
;
538 g
->current
= ucol_nextWeight(g
->ranges
, &g
->noOfRanges
);
543 uint32_t u_toLargeKana(const UChar
*source
, const uint32_t sourceLen
, UChar
*resBuf
, const uint32_t resLen
, UErrorCode
*status
) {
547 if(U_FAILURE(*status
)) {
551 if(sourceLen
> resLen
) {
552 *status
= U_MEMORY_ALLOCATION_ERROR
;
556 for(i
= 0; i
< sourceLen
; i
++) {
558 if(0x3041 <= c
&& c
<= 0x30FA) { /* Kana range */
560 case 0x41: case 0x43: case 0x45: case 0x47: case 0x49: case 0x63: case 0x83: case 0x85: case 0x8E:
561 case 0xA1: case 0xA3: case 0xA5: case 0xA7: case 0xA9: case 0xC3: case 0xE3: case 0xE5: case 0xEE:
578 uint32_t u_toSmallKana(const UChar
*source
, const uint32_t sourceLen
, UChar
*resBuf
, const uint32_t resLen
, UErrorCode
*status
) {
582 if(U_FAILURE(*status
)) {
586 if(sourceLen
> resLen
) {
587 *status
= U_MEMORY_ALLOCATION_ERROR
;
591 for(i
= 0; i
< sourceLen
; i
++) {
593 if(0x3041 <= c
&& c
<= 0x30FA) { /* Kana range */
595 case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F:
596 case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF:
613 uint8_t ucol_uprv_getCaseBits(const UCollator
*UCA
, const UChar
*src
, uint32_t len
, UErrorCode
*status
) {
617 uint32_t uCount
= 0, lCount
= 0;
622 if(U_FAILURE(*status
)) {
623 return UCOL_LOWER_CASE
;
626 nLen
= unorm_normalize(src
, len
, UNORM_NFKD
, 0, n
, 128, status
);
627 if(U_SUCCESS(*status
)) {
628 for(i
= 0; i
< nLen
; i
++) {
629 uprv_init_collIterate(UCA
, &n
[i
], 1, &s
);
630 order
= ucol_getNextCE(UCA
, &s
, status
);
631 if(isContinuation(order
)) {
632 *status
= U_INTERNAL_PROGRAM_ERROR
;
633 return UCOL_LOWER_CASE
;
635 if((order
&UCOL_CASE_BIT_MASK
)== UCOL_UPPER_CASE
) {
638 if(u_islower(n
[i
])) {
642 u_toSmallKana(&n
[i
], 1, sk
, 1, status
);
643 u_toLargeKana(&n
[i
], 1, lk
, 1, status
);
644 if(sk
[0] == n
[i
] && lk
[0] != n
[i
]) {
652 if(uCount
!= 0 && lCount
!= 0) {
653 return UCOL_MIXED_CASE
;
654 } else if(uCount
!= 0) {
655 return UCOL_UPPER_CASE
;
657 return UCOL_LOWER_CASE
;
662 U_CFUNC
void ucol_doCE(UColTokenParser
*src
, uint32_t *CEparts
, UColToken
*tok
, UErrorCode
*status
) {
663 /* this one makes the table and stuff */
664 uint32_t noOfBytes
[3];
667 for(i
= 0; i
<3; i
++) {
668 ucol_countBytes(CEparts
[i
], noOfBytes
[i
]);
671 /* Here we have to pack CEs from parts */
676 while(2*CEi
<noOfBytes
[0] || CEi
<noOfBytes
[1] || CEi
<noOfBytes
[2]) {
678 value
= UCOL_CONTINUATION_MARKER
; /* Continuation marker */
683 if(2*CEi
<noOfBytes
[0]) {
684 value
|= ((CEparts
[0]>>(32-16*(CEi
+1))) & 0xFFFF) << 16;
686 if(CEi
<noOfBytes
[1]) {
687 value
|= ((CEparts
[1]>>(32-8*(CEi
+1))) & 0xFF) << 8;
689 if(CEi
<noOfBytes
[2]) {
690 value
|= ((CEparts
[2]>>(32-8*(CEi
+1))) & 0x3F);
692 tok
->CEs
[CEi
] = value
;
695 if(CEi
== 0) { /* totally ignorable */
698 } else { /* there is at least something */
703 // we want to set case bits here and now, not later.
704 // Case bits handling
705 if(tok
->CEs
[0] != 0) { // case bits should be set only for non-ignorables
706 tok
->CEs
[0] &= 0xFFFFFF3F; // Clean the case bits field
707 int32_t cSize
= (tok
->source
& 0xFF000000) >> 24;
708 UChar
*cPoints
= (tok
->source
& 0x00FFFFFF) + src
->source
;
712 tok
->CEs
[0] |= ucol_uprv_getCaseBits(src
->UCA
, cPoints
, cSize
, status
);
714 // Copy it from the UCA
715 uint32_t caseCE
= ucol_getFirstCE(src
->UCA
, cPoints
[0], status
);
716 tok
->CEs
[0] |= (caseCE
& 0xC0);
721 fprintf(stderr
, "%04X str: %i, [%08X, %08X, %08X]: tok: ", tok
->debugSource
, tok
->strength
, CEparts
[0] >> (32-8*noOfBytes
[0]), CEparts
[1] >> (32-8*noOfBytes
[1]), CEparts
[2]>> (32-8*noOfBytes
[2]));
722 for(i
= 0; i
<tok
->noOfCEs
; i
++) {
723 fprintf(stderr
, "%08X ", tok
->CEs
[i
]);
725 fprintf(stderr
, "\n");
729 U_CFUNC
void ucol_initBuffers(UColTokenParser
*src
, UColTokListHeader
*lh
, UErrorCode
*status
) {
730 ucolCEGenerator Gens
[UCOL_CE_STRENGTH_LIMIT
];
731 uint32_t CEparts
[UCOL_CE_STRENGTH_LIMIT
];
733 UColToken
*tok
= lh
->last
;
734 uint32_t t
[UCOL_STRENGTH_LIMIT
];
736 uprv_memset(t
, 0, UCOL_STRENGTH_LIMIT
*sizeof(uint32_t));
739 t
[tok
->strength
] = 1;
741 while(tok
->previous
!= NULL
) {
742 if(tok
->previous
->strength
< tok
->strength
) { /* going up */
743 t
[tok
->strength
] = 0;
744 t
[tok
->previous
->strength
]++;
745 } else if(tok
->previous
->strength
> tok
->strength
) { /* going down */
746 t
[tok
->previous
->strength
] = 1;
751 tok
->toInsert
= t
[tok
->strength
];
754 tok
->toInsert
= t
[tok
->strength
];
755 ucol_inv_getGapPositions(src
, lh
, status
);
758 fprintf(stderr
, "BaseCE: %08X %08X\n", lh
->baseCE
, lh
->baseContCE
);
760 for(j
= 2; j
>= 0; j
--) {
761 fprintf(stderr
, "gapsLo[%i] [%08X %08X %08X]\n", j
, lh
->gapsLo
[j
*3], lh
->gapsLo
[j
*3+1], lh
->gapsLo
[j
*3+2]);
762 fprintf(stderr
, "gapsHi[%i] [%08X %08X %08X]\n", j
, lh
->gapsHi
[j
*3], lh
->gapsHi
[j
*3+1], lh
->gapsHi
[j
*3+2]);
764 tok
=lh
->first
[UCOL_TOK_POLARITY_POSITIVE
];
767 fprintf(stderr
,"%i", tok
->strength
);
769 } while(tok
!= NULL
);
770 fprintf(stderr
, "\n");
772 tok
=lh
->first
[UCOL_TOK_POLARITY_POSITIVE
];
775 fprintf(stderr
,"%i", tok
->toInsert
);
777 } while(tok
!= NULL
);
781 uint32_t fStrength
= UCOL_IDENTICAL
;
782 uint32_t initStrength
= UCOL_IDENTICAL
;
785 CEparts
[UCOL_PRIMARY
] = (lh
->baseCE
& UCOL_PRIMARYMASK
) | (lh
->baseContCE
& UCOL_PRIMARYMASK
) >> 16;
786 CEparts
[UCOL_SECONDARY
] = (lh
->baseCE
& UCOL_SECONDARYMASK
) << 16 | (lh
->baseContCE
& UCOL_SECONDARYMASK
) << 8;
787 CEparts
[UCOL_TERTIARY
] = (UCOL_TERTIARYORDER(lh
->baseCE
)) << 24 | (UCOL_TERTIARYORDER(lh
->baseContCE
)) << 16;
789 while (tok
!= NULL
&& U_SUCCESS(*status
)) {
790 fStrength
= tok
->strength
;
791 if(fStrength
< initStrength
) {
792 initStrength
= fStrength
;
793 if(lh
->pos
[fStrength
] == -1) {
794 while(lh
->pos
[fStrength
] == -1 && fStrength
> 0) {
797 if(lh
->pos
[fStrength
] == -1) {
798 *status
= U_INTERNAL_PROGRAM_ERROR
;
802 if(initStrength
== UCOL_TERTIARY
) { /* starting with tertiary */
803 CEparts
[UCOL_PRIMARY
] = lh
->gapsLo
[fStrength
*3];
804 CEparts
[UCOL_SECONDARY
] = lh
->gapsLo
[fStrength
*3+1];
805 /*CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[2], lh->gapsLo[fStrength*3+2], lh->gapsHi[fStrength*3+2], tok, UCOL_TERTIARY); */
806 CEparts
[UCOL_TERTIARY
] = ucol_getCEGenerator(&Gens
[UCOL_TERTIARY
], lh
->gapsLo
, lh
->gapsHi
, tok
, fStrength
, status
);
807 } else if(initStrength
== UCOL_SECONDARY
) { /* secondaries */
808 CEparts
[UCOL_PRIMARY
] = lh
->gapsLo
[fStrength
*3];
809 /*CEparts[1] = ucol_getCEGenerator(&Gens[1], lh->gapsLo[fStrength*3+1], lh->gapsHi[fStrength*3+1], tok, 1);*/
810 CEparts
[UCOL_SECONDARY
] = ucol_getCEGenerator(&Gens
[UCOL_SECONDARY
], lh
->gapsLo
, lh
->gapsHi
, tok
, fStrength
, status
);
811 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
812 } else { /* primaries */
813 /*CEparts[UCOL_PRIMARY] = ucol_getCEGenerator(&Gens[0], lh->gapsLo[0], lh->gapsHi[0], tok, UCOL_PRIMARY);*/
814 CEparts
[UCOL_PRIMARY
] = ucol_getCEGenerator(&Gens
[UCOL_PRIMARY
], lh
->gapsLo
, lh
->gapsHi
, tok
, fStrength
, status
);
815 CEparts
[UCOL_SECONDARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_SECONDARY
], tok
, UCOL_SECONDARY
, status
);
816 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
819 if(tok
->strength
== UCOL_TERTIARY
) {
820 CEparts
[UCOL_TERTIARY
] = ucol_getNextGenerated(&Gens
[UCOL_TERTIARY
], status
);
821 } else if(tok
->strength
== UCOL_SECONDARY
) {
822 CEparts
[UCOL_SECONDARY
] = ucol_getNextGenerated(&Gens
[UCOL_SECONDARY
], status
);
823 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
824 } else if(tok
->strength
== UCOL_PRIMARY
) {
825 CEparts
[UCOL_PRIMARY
] = ucol_getNextGenerated(&Gens
[UCOL_PRIMARY
], status
);
826 CEparts
[UCOL_SECONDARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_SECONDARY
], tok
, UCOL_SECONDARY
, status
);
827 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
830 ucol_doCE(src
, CEparts
, tok
, status
);
835 U_CFUNC
void ucol_createElements(UColTokenParser
*src
, tempUCATable
*t
, UColTokListHeader
*lh
, UErrorCode
*status
) {
837 UColToken
*tok
= lh
->first
;
838 UColToken
*expt
= NULL
;
839 uint32_t i
= 0, j
= 0;
840 const uint16_t *fcdTrieData
= unorm_getFCDTrie(status
);
842 while(tok
!= NULL
&& U_SUCCESS(*status
)) {
843 /* first, check if there are any expansions */
844 /* if there are expansions, we need to do a little bit more processing */
845 /* since parts of expansion can be tailored, while others are not */
846 if(tok
->expansion
!= 0) {
847 uint32_t len
= tok
->expansion
>> 24;
848 uint32_t currentSequenceLen
= len
;
849 uint32_t expOffset
= tok
->expansion
& 0x00FFFFFF;
850 //uint32_t exp = currentSequenceLen | expOffset;
852 exp
.source
= currentSequenceLen
| expOffset
;
853 exp
.rulesToParse
= src
->source
;
856 currentSequenceLen
= len
;
857 while(currentSequenceLen
> 0) {
858 exp
.source
= (currentSequenceLen
<< 24) | expOffset
;
859 if((expt
= (UColToken
*)uhash_get(src
->tailored
, &exp
)) != NULL
&& expt
->strength
!= UCOL_TOK_RESET
) { /* expansion is tailored */
860 uint32_t noOfCEsToCopy
= expt
->noOfCEs
;
861 for(j
= 0; j
<noOfCEsToCopy
; j
++) {
862 tok
->expCEs
[tok
->noOfExpCEs
+ j
] = expt
->CEs
[j
];
864 tok
->noOfExpCEs
+= noOfCEsToCopy
;
865 // Smart people never try to add codepoints and CEs.
866 // For some odd reason, it won't work.
867 expOffset
+= currentSequenceLen
; //noOfCEsToCopy;
868 len
-= currentSequenceLen
; //noOfCEsToCopy;
871 currentSequenceLen
--;
874 if(currentSequenceLen
== 0) { /* couldn't find any tailored subsequence */
875 /* will have to get one from UCA */
876 /* first, get the UChars from the rules */
877 /* then pick CEs out until there is no more and stuff them into expansion */
880 uprv_init_collIterate(src
->UCA
, expOffset
+ src
->source
, 1, &s
);
883 order
= ucol_getNextCE(src
->UCA
, &s
, status
);
884 if(order
== UCOL_NO_MORE_CES
) {
887 tok
->expCEs
[tok
->noOfExpCEs
++] = order
;
897 /* set the ucaelement with obtained values */
898 el
.noOfCEs
= tok
->noOfCEs
+ tok
->noOfExpCEs
;
900 for(i
= 0; i
<tok
->noOfCEs
; i
++) {
901 el
.CEs
[i
] = tok
->CEs
[i
];
903 for(i
= 0; i
<tok
->noOfExpCEs
; i
++) {
904 el
.CEs
[i
+tok
->noOfCEs
] = tok
->expCEs
[i
];
908 // We kept prefix and source kind of together, as it is a kind of a contraction.
909 // However, now we have to slice the prefix off the main thing -
910 el
.prefix
= el
.prefixChars
;
911 el
.cPoints
= el
.uchars
;
912 if(tok
->prefix
!= 0) { // we will just copy the prefix here, and adjust accordingly in the
913 // addPrefix function in ucol_elm. The reason is that we need to add both composed AND
914 // decomposed elements to the unsaf table.
915 el
.prefixSize
= tok
->prefix
>>24;
916 uprv_memcpy(el
.prefix
, src
->source
+ (tok
->prefix
& 0x00FFFFFF), el
.prefixSize
*sizeof(UChar
));
918 el
.cSize
= (tok
->source
>> 24)-(tok
->prefix
>>24);
919 uprv_memcpy(el
.uchars
, (tok
->source
& 0x00FFFFFF)+(tok
->prefix
>>24) + src
->source
, el
.cSize
*sizeof(UChar
));
924 el
.cSize
= (tok
->source
>> 24);
925 uprv_memcpy(el
.uchars
, (tok
->source
& 0x00FFFFFF) + src
->source
, el
.cSize
*sizeof(UChar
));
927 if(src
->UCA
!= NULL
) {
928 UBool containCombinMarks
= FALSE
;
929 for(i
= 0; i
<el
.cSize
; i
++) {
930 if(UCOL_ISJAMO(el
.cPoints
[i
])) {
931 t
->image
->jamoSpecial
= TRUE
;
933 if ( !src
->buildCCTabFlag
) {
934 // check combining class
935 int16_t fcd
= unorm_getFCD16(fcdTrieData
, el
.cPoints
[i
]);
936 if ( (fcd
&& 0xff) == 0 ) {
937 // reset flag when current char is not combining mark.
938 containCombinMarks
= FALSE
;
941 containCombinMarks
= TRUE
;
945 if ( !src
->buildCCTabFlag
&& containCombinMarks
) {
946 src
->buildCCTabFlag
= TRUE
;
950 /* and then, add it */
952 fprintf(stderr
, "Adding: %04X with %08X\n", el
.cPoints
[0], el
.CEs
[0]);
954 uprv_uca_addAnElement(t
, &el
, status
);
956 #if UCOL_DEBUG_DUPLICATES
957 if(*status
!= U_ZERO_ERROR
) {
958 fprintf(stderr
, "replaced CE for %04X with CE for %04X\n", el
.cPoints
[0], tok
->debugSource
);
959 *status
= U_ZERO_ERROR
;
968 static UBool U_CALLCONV
969 _processUCACompleteIgnorables(const void *context
, UChar32 start
, UChar32 limit
, uint32_t value
) {
970 UErrorCode status
= U_ZERO_ERROR
;
971 tempUCATable
*t
= (tempUCATable
*)context
;
973 while(start
< limit
) {
974 uint32_t CE
= utrie_get32(t
->mapping
, start
, NULL
);
975 if(CE
== UCOL_NOT_FOUND
) {
979 el
.prefixChars
[0] = 0;
980 el
.prefix
= el
.prefixChars
;
981 el
.cPoints
= el
.uchars
;
984 UTF_APPEND_CHAR(el
.uchars
, el
.cSize
, 1024, start
);
988 uprv_uca_addAnElement(t
, &el
, &status
);
994 if(U_FAILURE(status
)) {
1003 ucol_uprv_bld_copyRangeFromUCA(UColTokenParser
*src
, tempUCATable
*t
,
1004 UChar32 start
, UChar32 end
,
1007 //UChar decomp[256];
1008 uint32_t CE
= UCOL_NOT_FOUND
;
1013 el
.prefixChars
[0] = 0;
1016 if(U_SUCCESS(*status
)) {
1017 for(u
= start
; u
<=end
; u
++) {
1018 if((CE
= utrie_get32(t
->mapping
, u
, NULL
)) == UCOL_NOT_FOUND
1019 /* this test is for contractions that are missing the starting element. */
1020 || ((isCntTableElement(CE
)) &&
1021 (uprv_cnttab_getCE(t
->contractions
, CE
, 0, status
) == UCOL_NOT_FOUND
))
1025 U16_APPEND_UNSAFE(el
.uchars
, el
.cSize
, u
);
1026 //decomp[0] = (UChar)u;
1027 //el.uchars[0] = (UChar)u;
1028 el
.cPoints
= el
.uchars
;
1031 el
.prefix
= el
.prefixChars
;
1033 //uprv_init_collIterate(src->UCA, decomp, 1, &colIt);
1034 // We actually want to check whether this element is a special
1035 // If it is an implicit element (hangul, CJK - we want to copy the
1036 // special, not the resolved CEs) - for hangul, copying resolved
1037 // would just make things the same (there is an expansion and it
1038 // takes approximately the same amount of time to resolve as
1039 // falling back to the UCA).
1041 UTRIE_GET32(src->UCA->mapping, u, CE);
1043 if(tag == HANGUL_SYLLABLE_TAG || tag == CJK_IMPLICIT_TAG
1044 || tag == IMPLICIT_TAG || tag == TRAIL_SURROGATE_TAG
1045 || tag == LEAD_SURROGATE_TAG) {
1046 el.CEs[el.noOfCEs++] = CE;
1049 // It turns out that it does not make sense to keep implicits
1050 // unresolved. The cost of resolving them is big enough so that
1051 // it doesn't make any difference whether we have to go to the UCA
1054 uprv_init_collIterate(src
->UCA
, el
.uchars
, el
.cSize
, &colIt
);
1055 while(CE
!= UCOL_NO_MORE_CES
) {
1056 CE
= ucol_getNextCE(src
->UCA
, &colIt
, status
);
1057 if(CE
!= UCOL_NO_MORE_CES
) {
1058 el
.CEs
[el
.noOfCEs
++] = CE
;
1062 uprv_uca_addAnElement(t
, &el
, status
);
1068 UCATableHeader
*ucol_assembleTailoringTable(UColTokenParser
*src
, UErrorCode
*status
) {
1072 if(U_FAILURE(*status
)) {
1076 2. Eliminate the negative lists by doing the following for each non-null negative list:
1077 o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,
1078 create new ListHeader X
1079 o reverse the list, add to the end of X's positive list. Reset the strength of the
1080 first item you add, based on the stronger strength levels of the two lists.
1083 3. For each ListHeader with a non-null positive list:
1086 o Find all character strings with CEs between the baseCE and the
1087 next/previous CE, at the strength of the first token. Add these to the
1089 ? That is, if UCA has ... x <<< X << x' <<< X' < y ..., and the
1090 tailoring has & x < z...
1091 ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
1093 /* It is possible that this part should be done even while constructing list */
1094 /* The problem is that it is unknown what is going to be the strongest weight */
1095 /* So we might as well do it here */
1098 o Allocate CEs for each token in the list, based on the total number N of the
1099 largest level difference, and the gap G between baseCE and nextCE at that
1100 level. The relation * between the last item and nextCE is the same as the
1102 o Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1)
1103 ? There are 3 primary items: a, d, e. Fit them into the primary gap.
1104 Then fit b and c into the secondary gap between a and d, then fit q
1105 into the tertiary gap between b and c.
1107 o Example: baseCE << b <<< q << c * nextCE(X,2)
1108 ? There are 2 secondary items: b, c. Fit them into the secondary gap.
1109 Then fit q into the tertiary gap between b and c.
1110 o When incrementing primary values, we will not cross high byte
1111 boundaries except where there is only a single-byte primary. That is to
1112 ensure that the script reordering will continue to work.
1114 UCATableHeader
*image
= (UCATableHeader
*)uprv_malloc(sizeof(UCATableHeader
));
1116 if (image
== NULL
) {
1117 *status
= U_MEMORY_ALLOCATION_ERROR
;
1120 uprv_memcpy(image
, src
->UCA
->image
, sizeof(UCATableHeader
));
1122 for(i
= 0; i
<src
->resultLen
; i
++) {
1123 /* now we need to generate the CEs */
1124 /* We stuff the initial value in the buffers, and increase the appropriate buffer */
1125 /* According to strength */
1126 if(U_SUCCESS(*status
)) {
1127 if(src
->lh
[i
].first
) { // if there are any elements
1128 // due to the way parser works, subsequent tailorings
1129 // may remove all the elements from a sequence, therefore
1130 // leaving an empty tailoring sequence.
1131 ucol_initBuffers(src
, &src
->lh
[i
], status
);
1134 if(U_FAILURE(*status
)) {
1140 if(src
->varTop
!= NULL
) { /* stuff the variable top value */
1141 src
->opts
->variableTopValue
= (*(src
->varTop
->CEs
))>>16;
1142 /* remove it from the list */
1143 if(src
->varTop
->listHeader
->first
== src
->varTop
) { /* first in list */
1144 src
->varTop
->listHeader
->first
= src
->varTop
->next
;
1146 if(src
->varTop
->listHeader
->last
== src
->varTop
) { /* first in list */
1147 src
->varTop
->listHeader
->last
= src
->varTop
->previous
;
1149 if(src
->varTop
->next
!= NULL
) {
1150 src
->varTop
->next
->previous
= src
->varTop
->previous
;
1152 if(src
->varTop
->previous
!= NULL
) {
1153 src
->varTop
->previous
->next
= src
->varTop
->next
;
1158 tempUCATable
*t
= uprv_uca_initTempTable(image
, src
->opts
, src
->UCA
, NOT_FOUND_TAG
, NOT_FOUND_TAG
, status
);
1159 if(U_FAILURE(*status
)) {
1165 /* After this, we have assigned CE values to all regular CEs */
1166 /* now we will go through list once more and resolve expansions, */
1167 /* make UCAElements structs and add them to table */
1168 for(i
= 0; i
<src
->resultLen
; i
++) {
1169 /* now we need to generate the CEs */
1170 /* We stuff the initial value in the buffers, and increase the appropriate buffer */
1171 /* According to strength */
1172 if(U_SUCCESS(*status
)) {
1173 ucol_createElements(src
, t
, &src
->lh
[i
], status
);
1180 el
.prefixChars
[0] = 0;
1182 /* add latin-1 stuff */
1183 ucol_uprv_bld_copyRangeFromUCA(src
, t
, 0, 0xFF, status
);
1185 /* add stuff for copying */
1186 if(src
->copySet
!= NULL
) {
1188 UnicodeSet
*set
= (UnicodeSet
*)src
->copySet
;
1189 for(i
= 0; i
< set
->getRangeCount(); i
++) {
1190 ucol_uprv_bld_copyRangeFromUCA(src
, t
, set
->getRangeStart(i
), set
->getRangeEnd(i
), status
);
1194 if(U_SUCCESS(*status
)) {
1195 /* copy contractions from the UCA - this is felt mostly for cyrillic*/
1197 uint32_t tailoredCE
= UCOL_NOT_FOUND
;
1198 //UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts+sizeof(UCAConstants));
1199 UChar
*conts
= (UChar
*)((uint8_t *)src
->UCA
->image
+ src
->UCA
->image
->contractionUCACombos
);
1200 UCollationElements
*ucaEl
= ucol_openElements(src
->UCA
, NULL
, 0, status
);
1201 // Check for null pointer
1202 if (ucaEl
== NULL
) {
1203 *status
= U_MEMORY_ALLOCATION_ERROR
;
1206 while(*conts
!= 0) {
1207 /*tailoredCE = ucmpe32_get(t->mapping, *conts);*/
1208 tailoredCE
= utrie_get32(t
->mapping
, *conts
, NULL
);
1209 if(tailoredCE
!= UCOL_NOT_FOUND
) {
1210 UBool needToAdd
= TRUE
;
1211 if(isCntTableElement(tailoredCE
)) {
1212 if(uprv_cnttab_isTailored(t
->contractions
, tailoredCE
, conts
+1, status
) == TRUE
) {
1216 if (!needToAdd
&& isPrefix(tailoredCE
) && *(conts
+1)==0) {
1218 elm
.cPoints
= el
.uchars
;
1220 elm
.uchars
[0] = *conts
;
1223 elm
.prefixChars
[0] = *(conts
+2);
1225 elm
.prefix
= elm
.prefixChars
;
1227 UCAElements
*prefixEnt
=(UCAElements
*)uhash_get(t
->prefixLookup
, &elm
);
1228 if ((prefixEnt
==NULL
) || *(prefixEnt
->prefix
)!=*(conts
+2)) {
1232 if(src
->removeSet
!= NULL
&& uset_contains(src
->removeSet
, *conts
)) {
1236 if(needToAdd
== TRUE
) { // we need to add if this contraction is not tailored.
1237 if (*(conts
+1) != 0) { // contractions
1238 el
.prefix
= el
.prefixChars
;
1240 el
.cPoints
= el
.uchars
;
1242 el
.uchars
[0] = *conts
;
1243 el
.uchars
[1] = *(conts
+1);
1245 el
.uchars
[2] = *(conts
+2);
1250 ucol_setText(ucaEl
, el
.uchars
, el
.cSize
, status
);
1252 else { // pre-context character
1253 UChar str
[4] = { 0 };
1255 int32_t preKeyLen
=0;
1257 el
.cPoints
= el
.uchars
;
1259 el
.uchars
[0] = *conts
;
1262 el
.prefixChars
[0] = *(conts
+2);
1263 el
.prefix
= el
.prefixChars
;
1265 if (el
.prefixChars
[0]!=0) {
1266 // get CE of prefix character first
1267 str
[0]=el
.prefixChars
[0];
1269 ucol_setText(ucaEl
, str
, 1, status
);
1270 while ((int32_t)(el
.CEs
[el
.noOfCEs
] = ucol_next(ucaEl
, status
))
1271 != UCOL_NULLORDER
) {
1272 preKeyLen
++; // count number of keys for prefix character
1274 str
[len
++] = el
.prefixChars
[0];
1277 str
[len
++] = el
.uchars
[0];
1279 ucol_setText(ucaEl
, str
, len
, status
);
1280 // Skip the keys for prefix character, then copy the rest to el.
1281 while ((preKeyLen
-->0) &&
1282 (int32_t)(el
.CEs
[el
.noOfCEs
] = ucol_next(ucaEl
, status
)) != UCOL_NULLORDER
) {
1287 while ((int32_t)(el
.CEs
[el
.noOfCEs
] = ucol_next(ucaEl
, status
)) != UCOL_NULLORDER
) {
1290 uprv_uca_addAnElement(t
, &el
, status
);
1293 } else if(src
->removeSet
!= NULL
&& uset_contains(src
->removeSet
, *conts
)) {
1294 ucol_uprv_bld_copyRangeFromUCA(src
, t
, *conts
, *conts
, status
);
1298 ucol_closeElements(ucaEl
);
1301 // Add completely ignorable elements
1302 utrie_enum(&t
->UCA
->mapping
, NULL
, _processUCACompleteIgnorables
, t
);
1304 // add tailoring characters related canonical closures
1305 uprv_uca_canonicalClosure(t
, src
, status
);
1307 /* still need to produce compatibility closure */
1309 UCATableHeader
*myData
= uprv_uca_assembleTable(t
, status
);
1311 uprv_uca_closeTempTable(t
);
1318 static UBool U_CALLCONV
1319 ucol_bld_cleanup(void)
1321 udata_close(invUCA_DATA_MEM
);
1322 invUCA_DATA_MEM
= NULL
;
1323 _staticInvUCA
= NULL
;
1328 U_CAPI
const InverseUCATableHeader
* U_EXPORT2
1329 ucol_initInverseUCA(UErrorCode
*status
)
1331 if(U_FAILURE(*status
)) return NULL
;
1334 UMTX_CHECK(NULL
, (_staticInvUCA
== NULL
), needsInit
);
1337 InverseUCATableHeader
*newInvUCA
= NULL
;
1338 UDataMemory
*result
= udata_openChoice(NULL
, INVC_DATA_TYPE
, INVC_DATA_NAME
, isAcceptableInvUCA
, NULL
, status
);
1340 if(U_FAILURE(*status
)) {
1342 udata_close(result
);
1344 // This is not needed, as we are talking about
1345 // memory we got from UData
1346 //uprv_free(newInvUCA);
1349 if(result
!= NULL
) { /* It looks like sometimes we can fail to find the data file */
1350 newInvUCA
= (InverseUCATableHeader
*)udata_getMemory(result
);
1351 UCollator
*UCA
= ucol_initUCA(status
);
1352 // UCA versions of UCA and inverse UCA should match
1353 if(uprv_memcmp(newInvUCA
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
)) != 0) {
1354 *status
= U_INVALID_FORMAT_ERROR
;
1355 udata_close(result
);
1360 if(_staticInvUCA
== NULL
) {
1361 _staticInvUCA
= newInvUCA
;
1362 invUCA_DATA_MEM
= result
;
1368 if(newInvUCA
!= NULL
) {
1369 udata_close(result
);
1370 // This is not needed, as we are talking about
1371 // memory we got from UData
1372 //uprv_free(newInvUCA);
1375 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_BLD
, ucol_bld_cleanup
);
1379 return _staticInvUCA
;
1382 #endif /* #if !UCONFIG_NO_COLLATION */