2 *******************************************************************************
4 * Copyright (C) 2001-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_bld.cpp
10 * tab size: 8 (not used)
14 * created by: Vladimir Weinstein
16 * This module builds a collator based on the rule set.
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_COLLATION
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uchar.h"
29 #include "unicode/uniset.h"
31 static const InverseUCATableHeader
* _staticInvUCA
= NULL
;
32 static UDataMemory
* invUCA_DATA_MEM
= NULL
;
35 static UBool U_CALLCONV
36 isAcceptableInvUCA(void * /*context*/,
37 const char * /*type*/, const char * /*name*/,
38 const UDataInfo
*pInfo
){
39 /* context, type & name are intentionally not used */
40 if( pInfo
->size
>=20 &&
41 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
42 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
43 pInfo
->dataFormat
[0]==INVUCA_DATA_FORMAT_0
&& /* dataFormat="InvC" */
44 pInfo
->dataFormat
[1]==INVUCA_DATA_FORMAT_1
&&
45 pInfo
->dataFormat
[2]==INVUCA_DATA_FORMAT_2
&&
46 pInfo
->dataFormat
[3]==INVUCA_DATA_FORMAT_3
&&
47 pInfo
->formatVersion
[0]==INVUCA_FORMAT_VERSION_0
&&
48 pInfo
->formatVersion
[1]>=INVUCA_FORMAT_VERSION_1
//&&
49 //pInfo->formatVersion[1]==INVUCA_FORMAT_VERSION_1 &&
50 //pInfo->formatVersion[2]==INVUCA_FORMAT_VERSION_2 &&
51 //pInfo->formatVersion[3]==INVUCA_FORMAT_VERSION_3 &&
53 UVersionInfo UCDVersion
;
54 u_getUnicodeVersion(UCDVersion
);
55 if(pInfo
->dataVersion
[0]==UCDVersion
[0] &&
56 pInfo
->dataVersion
[1]==UCDVersion
[1]) {
57 //pInfo->dataVersion[1]==invUcaDataInfo.dataVersion[1] &&
58 //pInfo->dataVersion[2]==invUcaDataInfo.dataVersion[2] &&
59 //pInfo->dataVersion[3]==invUcaDataInfo.dataVersion[3]) {
71 * Takes two CEs (lead and continuation) and
72 * compares them as CEs should be compared:
73 * primary vs. primary, secondary vs. secondary
74 * tertiary vs. tertiary
76 static int32_t compareCEs(uint32_t source0
, uint32_t source1
, uint32_t target0
, uint32_t target1
) {
77 uint32_t s1
= source0
, s2
, t1
= target0
, t2
;
78 if(isContinuation(source1
)) {
83 if(isContinuation(target1
)) {
89 uint32_t s
= 0, t
= 0;
90 if(s1
== t1
&& s2
== t2
) {
93 s
= (s1
& 0xFFFF0000)|((s2
& 0xFFFF0000)>>16);
94 t
= (t1
& 0xFFFF0000)|((t2
& 0xFFFF0000)>>16);
100 s
= (s1
& 0x0000FF00) | (s2
& 0x0000FF00)>>8;
101 t
= (t1
& 0x0000FF00) | (t2
& 0x0000FF00)>>8;
107 s
= (s1
& 0x000000FF)<<8 | (s2
& 0x000000FF);
108 t
= (t1
& 0x000000FF)<<8 | (t2
& 0x000000FF);
119 int32_t ucol_inv_findCE(const UColTokenParser
*src
, uint32_t CE
, uint32_t SecondCE
) {
120 uint32_t bottom
= 0, top
= src
->invUCA
->tableSize
;
122 uint32_t first
= 0, second
= 0;
123 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
126 while(bottom
< top
-1) {
128 first
= *(CETable
+3*i
);
129 second
= *(CETable
+3*i
+1);
130 res
= compareCEs(first
, second
, CE
, SecondCE
);
141 /* in searching for elements, I have removed the failure */
142 /* The reason for this is that the builder does not rely */
143 /* on search mechanism telling it that it didn't find an */
144 /* element. However, indirect positioning relies on being */
145 /* able to find the elements around any CE, even if it is */
146 /* not defined in the UCA. */
149 if((first == CE && second == SecondCE)) {
157 static const uint32_t strengthMask
[UCOL_CE_STRENGTH_LIMIT
] = {
163 U_CAPI
int32_t U_EXPORT2
ucol_inv_getNextCE(const UColTokenParser
*src
,
164 uint32_t CE
, uint32_t contCE
,
165 uint32_t *nextCE
, uint32_t *nextContCE
,
167 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
170 iCE
= ucol_inv_findCE(src
, CE
, contCE
);
173 *nextCE
= UCOL_NOT_FOUND
;
177 CE
&= strengthMask
[strength
];
178 contCE
&= strengthMask
[strength
];
181 *nextContCE
= contCE
;
183 while((*nextCE
& strengthMask
[strength
]) == CE
184 && (*nextContCE
& strengthMask
[strength
]) == contCE
) {
185 *nextCE
= (*(CETable
+3*(++iCE
)));
186 *nextContCE
= (*(CETable
+3*(iCE
)+1));
192 U_CAPI
int32_t U_EXPORT2
ucol_inv_getPrevCE(const UColTokenParser
*src
,
193 uint32_t CE
, uint32_t contCE
,
194 uint32_t *prevCE
, uint32_t *prevContCE
,
196 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
199 iCE
= ucol_inv_findCE(src
, CE
, contCE
);
202 *prevCE
= UCOL_NOT_FOUND
;
206 CE
&= strengthMask
[strength
];
207 contCE
&= strengthMask
[strength
];
210 *prevContCE
= contCE
;
212 while((*prevCE
& strengthMask
[strength
]) == CE
213 && (*prevContCE
& strengthMask
[strength
])== contCE
214 && iCE
> 0) { /* this condition should prevent falling off the edge of the world */
215 /* here, we end up in a singularity - zero */
216 *prevCE
= (*(CETable
+3*(--iCE
)));
217 *prevContCE
= (*(CETable
+3*(iCE
)+1));
223 U_CAPI
uint32_t U_EXPORT2
ucol_getCEStrengthDifference(uint32_t CE
, uint32_t contCE
,
224 uint32_t prevCE
, uint32_t prevContCE
)
226 if(prevCE
== CE
&& prevContCE
== contCE
) {
227 return UCOL_IDENTICAL
;
229 if((prevCE
& strengthMask
[UCOL_PRIMARY
]) != (CE
& strengthMask
[UCOL_PRIMARY
])
230 || (prevContCE
& strengthMask
[UCOL_PRIMARY
]) != (contCE
& strengthMask
[UCOL_PRIMARY
])) {
233 if((prevCE
& strengthMask
[UCOL_SECONDARY
]) != (CE
& strengthMask
[UCOL_SECONDARY
])
234 || (prevContCE
& strengthMask
[UCOL_SECONDARY
]) != (contCE
& strengthMask
[UCOL_SECONDARY
])) {
235 return UCOL_SECONDARY
;
237 return UCOL_TERTIARY
;
242 inline int32_t ucol_inv_getPrevious(UColTokenParser
*src
, UColTokListHeader
*lh
, uint32_t strength
) {
244 uint32_t CE
= lh
->baseCE
;
245 uint32_t SecondCE
= lh
->baseContCE
;
247 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
248 uint32_t previousCE
, previousContCE
;
251 iCE
= ucol_inv_findCE(src
, CE
, SecondCE
);
257 CE
&= strengthMask
[strength
];
258 SecondCE
&= strengthMask
[strength
];
261 previousContCE
= SecondCE
;
263 while((previousCE
& strengthMask
[strength
]) == CE
&& (previousContCE
& strengthMask
[strength
])== SecondCE
) {
264 previousCE
= (*(CETable
+3*(--iCE
)));
265 previousContCE
= (*(CETable
+3*(iCE
)+1));
267 lh
->previousCE
= previousCE
;
268 lh
->previousContCE
= previousContCE
;
274 inline int32_t ucol_inv_getNext(UColTokenParser
*src
, UColTokListHeader
*lh
, uint32_t strength
) {
275 uint32_t CE
= lh
->baseCE
;
276 uint32_t SecondCE
= lh
->baseContCE
;
278 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
279 uint32_t nextCE
, nextContCE
;
282 iCE
= ucol_inv_findCE(src
, CE
, SecondCE
);
288 CE
&= strengthMask
[strength
];
289 SecondCE
&= strengthMask
[strength
];
292 nextContCE
= SecondCE
;
294 while((nextCE
& strengthMask
[strength
]) == CE
295 && (nextContCE
& strengthMask
[strength
]) == SecondCE
) {
296 nextCE
= (*(CETable
+3*(++iCE
)));
297 nextContCE
= (*(CETable
+3*(iCE
)+1));
301 lh
->nextContCE
= nextContCE
;
306 U_CFUNC
void ucol_inv_getGapPositions(UColTokenParser
*src
, UColTokListHeader
*lh
, UErrorCode
*status
) {
307 /* reset all the gaps */
309 uint32_t *CETable
= (uint32_t *)((uint8_t *)src
->invUCA
+src
->invUCA
->table
);
314 UColToken
*tok
= lh
->first
;
315 uint32_t tokStrength
= tok
->strength
;
317 for(i
= 0; i
<3; i
++) {
319 lh
->gapsHi
[3*i
+1] = 0;
320 lh
->gapsHi
[3*i
+2] = 0;
322 lh
->gapsLo
[3*i
+1] = 0;
323 lh
->gapsLo
[3*i
+2] = 0;
325 lh
->fStrToken
[i
] = NULL
;
326 lh
->lStrToken
[i
] = NULL
;
330 UCAConstants
*consts
= (UCAConstants
*)((uint8_t *)src
->UCA
->image
+ src
->UCA
->image
->UCAConsts
);
332 if((lh
->baseCE
& 0xFF000000)>= (consts
->UCA_PRIMARY_IMPLICIT_MIN
<<24) && (lh
->baseCE
& 0xFF000000) <= (consts
->UCA_PRIMARY_IMPLICIT_MAX
<<24) ) { /* implicits - */
333 //if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */
336 t2
= lh
->baseContCE
& UCOL_REMOVE_CONTINUATION
;
337 lh
->gapsLo
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
338 lh
->gapsLo
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
339 lh
->gapsLo
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
340 uint32_t primaryCE
= t1
& UCOL_PRIMARYMASK
| (t2
& UCOL_PRIMARYMASK
) >> 16;
341 primaryCE
= uprv_uca_getImplicitFromRaw(uprv_uca_getRawFromImplicit(primaryCE
)+1);
343 t1
= primaryCE
& UCOL_PRIMARYMASK
| 0x0505;
344 t2
= (primaryCE
<< 16) & UCOL_PRIMARYMASK
; // | UCOL_CONTINUATION_MARKER;
346 lh
->gapsHi
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
347 lh
->gapsHi
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
348 lh
->gapsHi
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
349 } else if(lh
->indirect
== TRUE
&& lh
->nextCE
!= 0) {
350 //} else if(lh->baseCE == UCOL_RESET_TOP_VALUE && lh->baseContCE == 0) {
353 t2
= lh
->baseContCE
&UCOL_REMOVE_CONTINUATION
;
354 lh
->gapsLo
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
355 lh
->gapsLo
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
356 lh
->gapsLo
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
358 t2
= lh
->nextContCE
&UCOL_REMOVE_CONTINUATION
;
359 lh
->gapsHi
[0] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
360 lh
->gapsHi
[1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
361 lh
->gapsHi
[2] = (UCOL_TERTIARYORDER(t1
)) << 24 | (UCOL_TERTIARYORDER(t2
)) << 16;
364 if(tokStrength
< UCOL_CE_STRENGTH_LIMIT
) {
365 if((lh
->pos
[tokStrength
] = ucol_inv_getNext(src
, lh
, tokStrength
)) >= 0) {
366 lh
->fStrToken
[tokStrength
] = tok
;
367 } else { /* The CE must be implicit, since it's not in the table */
369 *status
= U_INTERNAL_PROGRAM_ERROR
;
373 while(tok
!= NULL
&& tok
->strength
>= tokStrength
) {
374 if(tokStrength
< UCOL_CE_STRENGTH_LIMIT
) {
375 lh
->lStrToken
[tokStrength
] = tok
;
379 if(tokStrength
< UCOL_CE_STRENGTH_LIMIT
-1) {
380 /* check if previous interval is the same and merge the intervals if it is so */
381 if(lh
->pos
[tokStrength
] == lh
->pos
[tokStrength
+1]) {
382 lh
->fStrToken
[tokStrength
] = lh
->fStrToken
[tokStrength
+1];
383 lh
->fStrToken
[tokStrength
+1] = NULL
;
384 lh
->lStrToken
[tokStrength
+1] = NULL
;
385 lh
->pos
[tokStrength
+1] = -1;
389 tokStrength
= tok
->strength
;
394 for(st
= 0; st
< 3; st
++) {
395 if((pos
= lh
->pos
[st
]) >= 0) {
396 t1
= *(CETable
+3*(pos
));
397 t2
= *(CETable
+3*(pos
)+1);
398 lh
->gapsHi
[3*st
] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
399 lh
->gapsHi
[3*st
+1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
400 //lh->gapsHi[3*st+2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
401 lh
->gapsHi
[3*st
+2] = (t1
&0x3f) << 24 | (t2
&0x3f) << 16;
403 //t1 = *(CETable+3*(pos));
404 //t2 = *(CETable+3*(pos)+1);
407 lh
->gapsLo
[3*st
] = (t1
& UCOL_PRIMARYMASK
) | (t2
& UCOL_PRIMARYMASK
) >> 16;
408 lh
->gapsLo
[3*st
+1] = (t1
& UCOL_SECONDARYMASK
) << 16 | (t2
& UCOL_SECONDARYMASK
) << 8;
409 lh
->gapsLo
[3*st
+2] = (t1
&0x3f) << 24 | (t2
&0x3f) << 16;
416 #define ucol_countBytes(value, noOfBytes) \
418 uint32_t mask = 0xFFFFFFFF; \
421 if(((value) & mask) != 0) { \
428 U_CFUNC
uint32_t ucol_getNextGenerated(ucolCEGenerator
*g
, UErrorCode
*status
) {
429 if(U_SUCCESS(*status
)) {
430 g
->current
= ucol_nextWeight(g
->ranges
, &g
->noOfRanges
);
435 U_CFUNC
uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator
*g
, UColToken
*tok
, uint32_t strength
, UErrorCode
*status
) {
436 /* TODO: rename to enum names */
437 uint32_t high
, low
, count
=1;
438 uint32_t maxByte
= (strength
== UCOL_TERTIARY
)?0x3F:0xFF;
440 if(strength
== UCOL_SECONDARY
) {
441 low
= UCOL_COMMON_TOP2
<<24;
443 count
= 0xFF - UCOL_COMMON_TOP2
;
445 low
= UCOL_BYTE_COMMON
<< 24; //0x05000000;
447 count
= 0x40 - UCOL_BYTE_COMMON
;
450 if(tok
->next
!= NULL
&& tok
->next
->strength
== strength
) {
451 count
= tok
->next
->toInsert
;
454 g
->noOfRanges
= ucol_allocWeights(low
, high
, count
, maxByte
, g
->ranges
);
455 g
->current
= UCOL_BYTE_COMMON
<<24;
457 if(g
->noOfRanges
== 0) {
458 *status
= U_INTERNAL_PROGRAM_ERROR
;
463 U_CFUNC
uint32_t ucol_getCEGenerator(ucolCEGenerator
*g
, uint32_t* lows
, uint32_t* highs
, UColToken
*tok
, uint32_t fStrength
, UErrorCode
*status
) {
464 uint32_t strength
= tok
->strength
;
465 uint32_t low
= lows
[fStrength
*3+strength
];
466 uint32_t high
= highs
[fStrength
*3+strength
];
467 uint32_t maxByte
= 0;
468 if(strength
== UCOL_TERTIARY
) {
470 } else if(strength
== UCOL_PRIMARY
) {
476 uint32_t count
= tok
->toInsert
;
478 if(low
>= high
&& strength
> UCOL_PRIMARY
) {
479 int32_t s
= strength
;
482 if(lows
[fStrength
*3+s
] != highs
[fStrength
*3+s
]) {
483 if(strength
== UCOL_SECONDARY
) {
484 low
= UCOL_COMMON_TOP2
<<24;
487 //low = 0x02000000; // This needs to be checked - what if low is
494 *status
= U_INTERNAL_PROGRAM_ERROR
;
504 if(strength
== UCOL_SECONDARY
) { /* similar as simple */
505 if(low
>= (UCOL_COMMON_BOT2
<<24) && low
< (uint32_t)(UCOL_COMMON_TOP2
<<24)) {
506 low
= UCOL_COMMON_TOP2
<<24;
508 if(high
> (UCOL_COMMON_BOT2
<<24) && high
< (uint32_t)(UCOL_COMMON_TOP2
<<24)) {
509 high
= UCOL_COMMON_TOP2
<<24;
511 if(low
< (UCOL_COMMON_BOT2
<<24)) {
512 g
->noOfRanges
= ucol_allocWeights(UCOL_BYTE_UNSHIFTED_MIN
<<24, high
, count
, maxByte
, g
->ranges
);
513 g
->current
= ucol_nextWeight(g
->ranges
, &g
->noOfRanges
);
514 //g->current = UCOL_COMMON_BOT2<<24;
519 g
->noOfRanges
= ucol_allocWeights(low
, high
, count
, maxByte
, g
->ranges
);
520 if(g
->noOfRanges
== 0) {
521 *status
= U_INTERNAL_PROGRAM_ERROR
;
523 g
->current
= ucol_nextWeight(g
->ranges
, &g
->noOfRanges
);
528 uint32_t u_toLargeKana(const UChar
*source
, const uint32_t sourceLen
, UChar
*resBuf
, const uint32_t resLen
, UErrorCode
*status
) {
532 if(U_FAILURE(*status
)) {
536 if(sourceLen
> resLen
) {
537 *status
= U_MEMORY_ALLOCATION_ERROR
;
541 for(i
= 0; i
< sourceLen
; i
++) {
543 if(0x3042 < c
&& c
< 0x30ef) { /* Kana range */
545 case 0x41: case 0x43: case 0x45: case 0x47: case 0x49: case 0x63: case 0x83: case 0x85: case 0x8E:
546 case 0xA1: case 0xA3: case 0xA5: case 0xA7: case 0xA9: case 0xC3: case 0xE3: case 0xE5: case 0xEE:
563 uint32_t u_toSmallKana(const UChar
*source
, const uint32_t sourceLen
, UChar
*resBuf
, const uint32_t resLen
, UErrorCode
*status
) {
567 if(U_FAILURE(*status
)) {
571 if(sourceLen
> resLen
) {
572 *status
= U_MEMORY_ALLOCATION_ERROR
;
576 for(i
= 0; i
< sourceLen
; i
++) {
578 if(0x3042 < c
&& c
< 0x30ef) { /* Kana range */
580 case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F:
581 case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF:
598 uint8_t ucol_uprv_getCaseBits(const UCollator
*UCA
, const UChar
*src
, uint32_t len
, UErrorCode
*status
) {
602 uint32_t uCount
= 0, lCount
= 0;
607 if(U_FAILURE(*status
)) {
608 return UCOL_LOWER_CASE
;
611 nLen
= unorm_normalize(src
, len
, UNORM_NFKD
, 0, n
, 128, status
);
612 if(U_SUCCESS(*status
)) {
613 for(i
= 0; i
< nLen
; i
++) {
614 uprv_init_collIterate(UCA
, &n
[i
], 1, &s
);
615 order
= ucol_getNextCE(UCA
, &s
, status
);
616 if(isContinuation(order
)) {
617 *status
= U_INTERNAL_PROGRAM_ERROR
;
618 return UCOL_LOWER_CASE
;
620 if((order
&UCOL_CASE_BIT_MASK
)== UCOL_UPPER_CASE
) {
623 if(u_islower(n
[i
])) {
627 u_toSmallKana(&n
[i
], 1, sk
, 1, status
);
628 u_toLargeKana(&n
[i
], 1, lk
, 1, status
);
629 if(sk
[0] == n
[i
] && lk
[0] != n
[i
]) {
637 if(uCount
!= 0 && lCount
!= 0) {
638 return UCOL_MIXED_CASE
;
639 } else if(uCount
!= 0) {
640 return UCOL_UPPER_CASE
;
642 return UCOL_LOWER_CASE
;
647 U_CFUNC
void ucol_doCE(UColTokenParser
*src
, uint32_t *CEparts
, UColToken
*tok
, UErrorCode
*status
) {
648 /* this one makes the table and stuff */
649 uint32_t noOfBytes
[3];
652 for(i
= 0; i
<3; i
++) {
653 ucol_countBytes(CEparts
[i
], noOfBytes
[i
]);
656 /* Here we have to pack CEs from parts */
661 while(2*CEi
<noOfBytes
[0] || CEi
<noOfBytes
[1] || CEi
<noOfBytes
[2]) {
663 value
= UCOL_CONTINUATION_MARKER
; /* Continuation marker */
668 if(2*CEi
<noOfBytes
[0]) {
669 value
|= ((CEparts
[0]>>(32-16*(CEi
+1))) & 0xFFFF) << 16;
671 if(CEi
<noOfBytes
[1]) {
672 value
|= ((CEparts
[1]>>(32-8*(CEi
+1))) & 0xFF) << 8;
674 if(CEi
<noOfBytes
[2]) {
675 value
|= ((CEparts
[2]>>(32-8*(CEi
+1))) & 0x3F);
677 tok
->CEs
[CEi
] = value
;
680 if(CEi
== 0) { /* totally ignorable */
683 } else { /* there is at least something */
688 // we want to set case bits here and now, not later.
689 // Case bits handling
690 if(tok
->CEs
[0] != 0) { // case bits should be set only for non-ignorables
691 tok
->CEs
[0] &= 0xFFFFFF3F; // Clean the case bits field
692 int32_t cSize
= (tok
->source
& 0xFF000000) >> 24;
693 UChar
*cPoints
= (tok
->source
& 0x00FFFFFF) + src
->source
;
697 tok
->CEs
[0] |= ucol_uprv_getCaseBits(src
->UCA
, cPoints
, cSize
, status
);
699 // Copy it from the UCA
700 uint32_t caseCE
= ucol_getFirstCE(src
->UCA
, cPoints
[0], status
);
701 tok
->CEs
[0] |= (caseCE
& 0xC0);
706 fprintf(stderr
, "%04X str: %i, [%08X, %08X, %08X]: tok: ", tok
->debugSource
, tok
->strength
, CEparts
[0] >> (32-8*noOfBytes
[0]), CEparts
[1] >> (32-8*noOfBytes
[1]), CEparts
[2]>> (32-8*noOfBytes
[2]));
707 for(i
= 0; i
<tok
->noOfCEs
; i
++) {
708 fprintf(stderr
, "%08X ", tok
->CEs
[i
]);
710 fprintf(stderr
, "\n");
714 U_CFUNC
void ucol_initBuffers(UColTokenParser
*src
, UColTokListHeader
*lh
, UErrorCode
*status
) {
715 ucolCEGenerator Gens
[UCOL_CE_STRENGTH_LIMIT
];
716 uint32_t CEparts
[UCOL_CE_STRENGTH_LIMIT
];
718 UColToken
*tok
= lh
->last
;
719 uint32_t t
[UCOL_STRENGTH_LIMIT
];
721 uprv_memset(t
, 0, UCOL_STRENGTH_LIMIT
*sizeof(uint32_t));
724 t
[tok
->strength
] = 1;
726 while(tok
->previous
!= NULL
) {
727 if(tok
->previous
->strength
< tok
->strength
) { /* going up */
728 t
[tok
->strength
] = 0;
729 t
[tok
->previous
->strength
]++;
730 } else if(tok
->previous
->strength
> tok
->strength
) { /* going down */
731 t
[tok
->previous
->strength
] = 1;
736 tok
->toInsert
= t
[tok
->strength
];
739 tok
->toInsert
= t
[tok
->strength
];
740 ucol_inv_getGapPositions(src
, lh
, status
);
743 fprintf(stderr
, "BaseCE: %08X %08X\n", lh
->baseCE
, lh
->baseContCE
);
745 for(j
= 2; j
>= 0; j
--) {
746 fprintf(stderr
, "gapsLo[%i] [%08X %08X %08X]\n", j
, lh
->gapsLo
[j
*3], lh
->gapsLo
[j
*3+1], lh
->gapsLo
[j
*3+2]);
747 fprintf(stderr
, "gapsHi[%i] [%08X %08X %08X]\n", j
, lh
->gapsHi
[j
*3], lh
->gapsHi
[j
*3+1], lh
->gapsHi
[j
*3+2]);
749 tok
=lh
->first
[UCOL_TOK_POLARITY_POSITIVE
];
752 fprintf(stderr
,"%i", tok
->strength
);
754 } while(tok
!= NULL
);
755 fprintf(stderr
, "\n");
757 tok
=lh
->first
[UCOL_TOK_POLARITY_POSITIVE
];
760 fprintf(stderr
,"%i", tok
->toInsert
);
762 } while(tok
!= NULL
);
766 uint32_t fStrength
= UCOL_IDENTICAL
;
767 uint32_t initStrength
= UCOL_IDENTICAL
;
770 CEparts
[UCOL_PRIMARY
] = (lh
->baseCE
& UCOL_PRIMARYMASK
) | (lh
->baseContCE
& UCOL_PRIMARYMASK
) >> 16;
771 CEparts
[UCOL_SECONDARY
] = (lh
->baseCE
& UCOL_SECONDARYMASK
) << 16 | (lh
->baseContCE
& UCOL_SECONDARYMASK
) << 8;
772 CEparts
[UCOL_TERTIARY
] = (UCOL_TERTIARYORDER(lh
->baseCE
)) << 24 | (UCOL_TERTIARYORDER(lh
->baseContCE
)) << 16;
774 while (tok
!= NULL
&& U_SUCCESS(*status
)) {
775 fStrength
= tok
->strength
;
776 if(fStrength
< initStrength
) {
777 initStrength
= fStrength
;
778 if(lh
->pos
[fStrength
] == -1) {
779 while(lh
->pos
[fStrength
] == -1 && fStrength
> 0) {
782 if(lh
->pos
[fStrength
] == -1) {
783 *status
= U_INTERNAL_PROGRAM_ERROR
;
787 if(initStrength
== UCOL_TERTIARY
) { /* starting with tertiary */
788 CEparts
[UCOL_PRIMARY
] = lh
->gapsLo
[fStrength
*3];
789 CEparts
[UCOL_SECONDARY
] = lh
->gapsLo
[fStrength
*3+1];
790 /*CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[2], lh->gapsLo[fStrength*3+2], lh->gapsHi[fStrength*3+2], tok, UCOL_TERTIARY); */
791 CEparts
[UCOL_TERTIARY
] = ucol_getCEGenerator(&Gens
[UCOL_TERTIARY
], lh
->gapsLo
, lh
->gapsHi
, tok
, fStrength
, status
);
792 } else if(initStrength
== UCOL_SECONDARY
) { /* secondaries */
793 CEparts
[UCOL_PRIMARY
] = lh
->gapsLo
[fStrength
*3];
794 /*CEparts[1] = ucol_getCEGenerator(&Gens[1], lh->gapsLo[fStrength*3+1], lh->gapsHi[fStrength*3+1], tok, 1);*/
795 CEparts
[UCOL_SECONDARY
] = ucol_getCEGenerator(&Gens
[UCOL_SECONDARY
], lh
->gapsLo
, lh
->gapsHi
, tok
, fStrength
, status
);
796 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
797 } else { /* primaries */
798 /*CEparts[UCOL_PRIMARY] = ucol_getCEGenerator(&Gens[0], lh->gapsLo[0], lh->gapsHi[0], tok, UCOL_PRIMARY);*/
799 CEparts
[UCOL_PRIMARY
] = ucol_getCEGenerator(&Gens
[UCOL_PRIMARY
], lh
->gapsLo
, lh
->gapsHi
, tok
, fStrength
, status
);
800 CEparts
[UCOL_SECONDARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_SECONDARY
], tok
, UCOL_SECONDARY
, status
);
801 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
804 if(tok
->strength
== UCOL_TERTIARY
) {
805 CEparts
[UCOL_TERTIARY
] = ucol_getNextGenerated(&Gens
[UCOL_TERTIARY
], status
);
806 } else if(tok
->strength
== UCOL_SECONDARY
) {
807 CEparts
[UCOL_SECONDARY
] = ucol_getNextGenerated(&Gens
[UCOL_SECONDARY
], status
);
808 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
809 } else if(tok
->strength
== UCOL_PRIMARY
) {
810 CEparts
[UCOL_PRIMARY
] = ucol_getNextGenerated(&Gens
[UCOL_PRIMARY
], status
);
811 CEparts
[UCOL_SECONDARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_SECONDARY
], tok
, UCOL_SECONDARY
, status
);
812 CEparts
[UCOL_TERTIARY
] = ucol_getSimpleCEGenerator(&Gens
[UCOL_TERTIARY
], tok
, UCOL_TERTIARY
, status
);
815 ucol_doCE(src
, CEparts
, tok
, status
);
820 U_CFUNC
void ucol_createElements(UColTokenParser
*src
, tempUCATable
*t
, UColTokListHeader
*lh
, UErrorCode
*status
) {
822 UColToken
*tok
= lh
->first
;
823 UColToken
*expt
= NULL
;
824 uint32_t i
= 0, j
= 0;
826 while(tok
!= NULL
&& U_SUCCESS(*status
)) {
827 /* first, check if there are any expansions */
828 /* if there are expansions, we need to do a little bit more processing */
829 /* since parts of expansion can be tailored, while others are not */
830 if(tok
->expansion
!= 0) {
831 uint32_t len
= tok
->expansion
>> 24;
832 uint32_t currentSequenceLen
= len
;
833 uint32_t expOffset
= tok
->expansion
& 0x00FFFFFF;
834 //uint32_t exp = currentSequenceLen | expOffset;
836 exp
.source
= currentSequenceLen
| expOffset
;
837 exp
.rulesToParse
= src
->source
;
840 currentSequenceLen
= len
;
841 while(currentSequenceLen
> 0) {
842 exp
.source
= (currentSequenceLen
<< 24) | expOffset
;
843 if((expt
= (UColToken
*)uhash_get(src
->tailored
, &exp
)) != NULL
&& expt
->strength
!= UCOL_TOK_RESET
) { /* expansion is tailored */
844 uint32_t noOfCEsToCopy
= expt
->noOfCEs
;
845 for(j
= 0; j
<noOfCEsToCopy
; j
++) {
846 tok
->expCEs
[tok
->noOfExpCEs
+ j
] = expt
->CEs
[j
];
848 tok
->noOfExpCEs
+= noOfCEsToCopy
;
849 // Smart people never try to add codepoints and CEs.
850 // For some odd reason, it won't work.
851 expOffset
+= currentSequenceLen
; //noOfCEsToCopy;
852 len
-= currentSequenceLen
; //noOfCEsToCopy;
855 currentSequenceLen
--;
858 if(currentSequenceLen
== 0) { /* couldn't find any tailored subsequence */
859 /* will have to get one from UCA */
860 /* first, get the UChars from the rules */
861 /* then pick CEs out until there is no more and stuff them into expansion */
864 uprv_init_collIterate(src
->UCA
, expOffset
+ src
->source
, 1, &s
);
867 order
= ucol_getNextCE(src
->UCA
, &s
, status
);
868 if(order
== UCOL_NO_MORE_CES
) {
871 tok
->expCEs
[tok
->noOfExpCEs
++] = order
;
881 /* set the ucaelement with obtained values */
882 el
.noOfCEs
= tok
->noOfCEs
+ tok
->noOfExpCEs
;
884 for(i
= 0; i
<tok
->noOfCEs
; i
++) {
885 el
.CEs
[i
] = tok
->CEs
[i
];
887 for(i
= 0; i
<tok
->noOfExpCEs
; i
++) {
888 el
.CEs
[i
+tok
->noOfCEs
] = tok
->expCEs
[i
];
892 // We kept prefix and source kind of together, as it is a kind of a contraction.
893 // However, now we have to slice the prefix off the main thing -
894 el
.prefix
= el
.prefixChars
;
895 el
.cPoints
= el
.uchars
;
896 if(tok
->prefix
!= 0) { // we will just copy the prefix here, and adjust accordingly in the
897 // addPrefix function in ucol_elm. The reason is that we need to add both composed AND
898 // decomposed elements to the unsaf table.
899 el
.prefixSize
= tok
->prefix
>>24;
900 uprv_memcpy(el
.prefix
, src
->source
+ (tok
->prefix
& 0x00FFFFFF), el
.prefixSize
*sizeof(UChar
));
902 el
.cSize
= (tok
->source
>> 24)-(tok
->prefix
>>24);
903 uprv_memcpy(el
.uchars
, (tok
->source
& 0x00FFFFFF)+(tok
->prefix
>>24) + src
->source
, el
.cSize
*sizeof(UChar
));
908 el
.cSize
= (tok
->source
>> 24);
909 uprv_memcpy(el
.uchars
, (tok
->source
& 0x00FFFFFF) + src
->source
, el
.cSize
*sizeof(UChar
));
911 if(src
->UCA
!= NULL
) {
912 for(i
= 0; i
<el
.cSize
; i
++) {
913 if(UCOL_ISJAMO(el
.cPoints
[i
])) {
914 t
->image
->jamoSpecial
= TRUE
;
919 /* and then, add it */
921 fprintf(stderr
, "Adding: %04X with %08X\n", el
.cPoints
[0], el
.CEs
[0]);
923 uprv_uca_addAnElement(t
, &el
, status
);
925 #if UCOL_DEBUG_DUPLICATES
926 if(*status
!= U_ZERO_ERROR
) {
927 fprintf(stderr
, "replaced CE for %04X with CE for %04X\n", el
.cPoints
[0], tok
->debugSource
);
928 *status
= U_ZERO_ERROR
;
937 static UBool U_CALLCONV
938 _processUCACompleteIgnorables(const void *context
, UChar32 start
, UChar32 limit
, uint32_t value
) {
939 UErrorCode status
= U_ZERO_ERROR
;
940 tempUCATable
*t
= (tempUCATable
*)context
;
942 while(start
< limit
) {
943 uint32_t CE
= utrie_get32(t
->mapping
, start
, NULL
);
944 if(CE
== UCOL_NOT_FOUND
) {
948 el
.prefixChars
[0] = 0;
949 el
.prefix
= el
.prefixChars
;
950 el
.cPoints
= el
.uchars
;
953 UTF_APPEND_CHAR(el
.uchars
, el
.cSize
, 1024, start
);
957 uprv_uca_addAnElement(t
, &el
, &status
);
963 if(U_FAILURE(status
)) {
972 ucol_uprv_bld_copyRangeFromUCA(UColTokenParser
*src
, tempUCATable
*t
,
973 UChar32 start
, UChar32 end
,
974 UErrorCode
*status
) {
976 uint32_t CE
= UCOL_NOT_FOUND
;
981 el
.prefixChars
[0] = 0;
984 if(U_SUCCESS(*status
)) {
985 for(u
= start
; u
<=end
; u
++) {
986 if((CE
= utrie_get32(t
->mapping
, u
, NULL
)) == UCOL_NOT_FOUND
987 /* this test is for contractions that are missing the starting element. */
988 || ((isCntTableElement(CE
)) &&
989 (uprv_cnttab_getCE(t
->contractions
, CE
, 0, status
) == UCOL_NOT_FOUND
))
992 U16_APPEND_UNSAFE(el
.uchars
, el
.cSize
, u
);
993 //decomp[0] = (UChar)u;
994 //el.uchars[0] = (UChar)u;
995 el
.cPoints
= el
.uchars
;
998 el
.prefix
= el
.prefixChars
;
1000 //uprv_init_collIterate(src->UCA, decomp, 1, &colIt);
1001 // We actually want to check whether this element is a special
1002 // If it is an implicit element (hangul, CJK - we want to copy the
1003 // special, not the resolved CEs) - for hangul, copying resolved
1004 // would just make things the same (there is an expansion and it
1005 // takes approximately the same amount of time to resolve as
1006 // falling back to the UCA).
1008 UTRIE_GET32(src->UCA->mapping, u, CE);
1010 if(tag == HANGUL_SYLLABLE_TAG || tag == CJK_IMPLICIT_TAG
1011 || tag == IMPLICIT_TAG || tag == TRAIL_SURROGATE_TAG
1012 || tag == LEAD_SURROGATE_TAG) {
1013 el.CEs[el.noOfCEs++] = CE;
1016 // It turns out that it does not make sense to keep implicits
1017 // unresolved. The cost of resolving them is big enough so that
1018 // it doesn't make any difference whether we have to go to the UCA
1021 uprv_init_collIterate(src
->UCA
, el
.uchars
, el
.cSize
, &colIt
);
1022 while(CE
!= UCOL_NO_MORE_CES
) {
1023 CE
= ucol_getNextCE(src
->UCA
, &colIt
, status
);
1024 if(CE
!= UCOL_NO_MORE_CES
) {
1025 el
.CEs
[el
.noOfCEs
++] = CE
;
1029 uprv_uca_addAnElement(t
, &el
, status
);
1035 UCATableHeader
*ucol_assembleTailoringTable(UColTokenParser
*src
, UErrorCode
*status
) {
1037 if(U_FAILURE(*status
)) {
1041 2. Eliminate the negative lists by doing the following for each non-null negative list:
1042 o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,
1043 create new ListHeader X
1044 o reverse the list, add to the end of X's positive list. Reset the strength of the
1045 first item you add, based on the stronger strength levels of the two lists.
1048 3. For each ListHeader with a non-null positive list:
1051 o Find all character strings with CEs between the baseCE and the
1052 next/previous CE, at the strength of the first token. Add these to the
1054 ? That is, if UCA has ... x <<< X << x' <<< X' < y ..., and the
1055 tailoring has & x < z...
1056 ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
1058 /* It is possible that this part should be done even while constructing list */
1059 /* The problem is that it is unknown what is going to be the strongest weight */
1060 /* So we might as well do it here */
1063 o Allocate CEs for each token in the list, based on the total number N of the
1064 largest level difference, and the gap G between baseCE and nextCE at that
1065 level. The relation * between the last item and nextCE is the same as the
1067 o Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1)
1068 ? There are 3 primary items: a, d, e. Fit them into the primary gap.
1069 Then fit b and c into the secondary gap between a and d, then fit q
1070 into the tertiary gap between b and c.
1072 o Example: baseCE << b <<< q << c * nextCE(X,2)
1073 ? There are 2 secondary items: b, c. Fit them into the secondary gap.
1074 Then fit q into the tertiary gap between b and c.
1075 o When incrementing primary values, we will not cross high byte
1076 boundaries except where there is only a single-byte primary. That is to
1077 ensure that the script reordering will continue to work.
1079 UCATableHeader
*image
= (UCATableHeader
*)uprv_malloc(sizeof(UCATableHeader
));
1081 if (image
== NULL
) {
1082 *status
= U_MEMORY_ALLOCATION_ERROR
;
1085 uprv_memcpy(image
, src
->UCA
->image
, sizeof(UCATableHeader
));
1087 for(i
= 0; i
<src
->resultLen
; i
++) {
1088 /* now we need to generate the CEs */
1089 /* We stuff the initial value in the buffers, and increase the appropriate buffer */
1090 /* According to strength */
1091 if(U_SUCCESS(*status
)) {
1092 if(src
->lh
[i
].first
) { // if there are any elements
1093 // due to the way parser works, subsequent tailorings
1094 // may remove all the elements from a sequence, therefore
1095 // leaving an empty tailoring sequence.
1096 ucol_initBuffers(src
, &src
->lh
[i
], status
);
1099 if(U_FAILURE(*status
)) {
1105 if(src
->varTop
!= NULL
) { /* stuff the variable top value */
1106 src
->opts
->variableTopValue
= (*(src
->varTop
->CEs
))>>16;
1107 /* remove it from the list */
1108 if(src
->varTop
->listHeader
->first
== src
->varTop
) { /* first in list */
1109 src
->varTop
->listHeader
->first
= src
->varTop
->next
;
1111 if(src
->varTop
->listHeader
->last
== src
->varTop
) { /* first in list */
1112 src
->varTop
->listHeader
->last
= src
->varTop
->previous
;
1114 if(src
->varTop
->next
!= NULL
) {
1115 src
->varTop
->next
->previous
= src
->varTop
->previous
;
1117 if(src
->varTop
->previous
!= NULL
) {
1118 src
->varTop
->previous
->next
= src
->varTop
->next
;
1123 tempUCATable
*t
= uprv_uca_initTempTable(image
, src
->opts
, src
->UCA
, NOT_FOUND_TAG
, NOT_FOUND_TAG
, status
);
1126 /* After this, we have assigned CE values to all regular CEs */
1127 /* now we will go through list once more and resolve expansions, */
1128 /* make UCAElements structs and add them to table */
1129 for(i
= 0; i
<src
->resultLen
; i
++) {
1130 /* now we need to generate the CEs */
1131 /* We stuff the initial value in the buffers, and increase the appropriate buffer */
1132 /* According to strength */
1133 if(U_SUCCESS(*status
)) {
1134 ucol_createElements(src
, t
, &src
->lh
[i
], status
);
1141 el
.prefixChars
[0] = 0;
1143 /* add latin-1 stuff */
1144 ucol_uprv_bld_copyRangeFromUCA(src
, t
, 0, 0xFF, status
);
1146 /* add stuff for copying */
1147 if(src
->copySet
!= NULL
) {
1149 UnicodeSet
*set
= (UnicodeSet
*)src
->copySet
;
1150 for(i
= 0; i
< set
->getRangeCount(); i
++) {
1151 ucol_uprv_bld_copyRangeFromUCA(src
, t
, set
->getRangeStart(i
), set
->getRangeEnd(i
), status
);
1155 if(U_SUCCESS(*status
)) {
1156 /* copy contractions from the UCA - this is felt mostly for cyrillic*/
1158 uint32_t tailoredCE
= UCOL_NOT_FOUND
;
1159 //UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts+sizeof(UCAConstants));
1160 UChar
*conts
= (UChar
*)((uint8_t *)src
->UCA
->image
+ src
->UCA
->image
->contractionUCACombos
);
1161 UCollationElements
*ucaEl
= ucol_openElements(src
->UCA
, NULL
, 0, status
);
1162 while(*conts
!= 0) {
1163 /*tailoredCE = ucmpe32_get(t->mapping, *conts);*/
1164 tailoredCE
= utrie_get32(t
->mapping
, *conts
, NULL
);
1165 if(tailoredCE
!= UCOL_NOT_FOUND
) {
1166 UBool needToAdd
= TRUE
;
1167 if(isCntTableElement(tailoredCE
)) {
1168 if(uprv_cnttab_isTailored(t
->contractions
, tailoredCE
, conts
+1, status
) == TRUE
) {
1172 if(src
->removeSet
!= NULL
&& uset_contains(src
->removeSet
, *conts
)) {
1176 if(needToAdd
== TRUE
) { // we need to add if this contraction is not tailored.
1177 el
.prefix
= el
.prefixChars
;
1179 el
.cPoints
= el
.uchars
;
1181 el
.uchars
[0] = *conts
;
1182 el
.uchars
[1] = *(conts
+1);
1184 el
.uchars
[2] = *(conts
+2);
1189 ucol_setText(ucaEl
, el
.uchars
, el
.cSize
, status
);
1190 while ((int32_t)(el
.CEs
[el
.noOfCEs
] = ucol_next(ucaEl
, status
)) != UCOL_NULLORDER
) {
1193 uprv_uca_addAnElement(t
, &el
, status
);
1196 } else if(src
->removeSet
!= NULL
&& uset_contains(src
->removeSet
, *conts
)) {
1197 ucol_uprv_bld_copyRangeFromUCA(src
, t
, *conts
, *conts
, status
);
1201 ucol_closeElements(ucaEl
);
1204 // Add completely ignorable elements
1205 utrie_enum(&t
->UCA
->mapping
, NULL
, _processUCACompleteIgnorables
, t
);
1208 // canonical closure
1209 uprv_uca_canonicalClosure(t
, status
);
1212 /* still need to produce compatibility closure */
1214 UCATableHeader
*myData
= uprv_uca_assembleTable(t
, status
);
1216 uprv_uca_closeTempTable(t
);
1223 static UBool U_CALLCONV
1224 ucol_bld_cleanup(void)
1226 udata_close(invUCA_DATA_MEM
);
1227 invUCA_DATA_MEM
= NULL
;
1228 _staticInvUCA
= NULL
;
1233 U_CAPI
const InverseUCATableHeader
* U_EXPORT2
1234 ucol_initInverseUCA(UErrorCode
*status
)
1236 if(U_FAILURE(*status
)) return NULL
;
1239 UBool f
= (_staticInvUCA
== NULL
);
1243 InverseUCATableHeader
*newInvUCA
= NULL
;
1244 UDataMemory
*result
= udata_openChoice(NULL
, INVC_DATA_TYPE
, INVC_DATA_NAME
, isAcceptableInvUCA
, NULL
, status
);
1246 if(U_FAILURE(*status
)) {
1248 udata_close(result
);
1250 // This is not needed, as we are talking about
1251 // memory we got from UData
1252 //uprv_free(newInvUCA);
1255 if(result
!= NULL
) { /* It looks like sometimes we can fail to find the data file */
1256 newInvUCA
= (InverseUCATableHeader
*)udata_getMemory(result
);
1257 UCollator
*UCA
= ucol_initUCA(status
);
1258 // UCA versions of UCA and inverse UCA should match
1259 if(uprv_memcmp(newInvUCA
->UCAVersion
, UCA
->image
->UCAVersion
, sizeof(UVersionInfo
)) != 0) {
1260 *status
= U_INVALID_FORMAT_ERROR
;
1261 udata_close(result
);
1266 if(_staticInvUCA
== NULL
) {
1267 _staticInvUCA
= newInvUCA
;
1268 invUCA_DATA_MEM
= result
;
1274 if(newInvUCA
!= NULL
) {
1275 udata_close(result
);
1276 // This is not needed, as we are talking about
1277 // memory we got from UData
1278 //uprv_free(newInvUCA);
1281 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_BLD
, ucol_bld_cleanup
);
1285 return _staticInvUCA
;
1288 #endif /* #if !UCONFIG_NO_COLLATION */