]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_bld.cpp
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_bld.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
4* Copyright (C) 2001-2003, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: ucol_bld.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created 02/22/2001
14* created by: Vladimir Weinstein
15*
16* This module builds a collator based on the rule set.
17*
18*/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucoleitr.h"
25#include "unicode/uchar.h"
26#include "ucol_bld.h"
27#include "ucln_in.h"
28#include "umutex.h"
29#include "unicode/uniset.h"
30
31static const InverseUCATableHeader* invUCA = NULL;
32static UDataMemory* invUCA_DATA_MEM = NULL;
33
34U_CDECL_BEGIN
35static UBool U_CALLCONV
36isAcceptableInvUCA(void * /*context*/,
37 const char * /*type*/, const char * /*name*/,
38 const UDataInfo *pInfo){
39 /* context, type & name are intentionally not used */
40 if( pInfo->size>=20 &&
41 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
42 pInfo->charsetFamily==U_CHARSET_FAMILY &&
43 pInfo->dataFormat[0]==invUcaDataInfo.dataFormat[0] && /* dataFormat="InvC" */
44 pInfo->dataFormat[1]==invUcaDataInfo.dataFormat[1] &&
45 pInfo->dataFormat[2]==invUcaDataInfo.dataFormat[2] &&
46 pInfo->dataFormat[3]==invUcaDataInfo.dataFormat[3] &&
47 pInfo->formatVersion[0]==invUcaDataInfo.formatVersion[0] &&
48 pInfo->formatVersion[1]>=invUcaDataInfo.formatVersion[1] //&&
49 //pInfo->formatVersion[1]==invUcaDataInfo.formatVersion[1] &&
50 //pInfo->formatVersion[2]==invUcaDataInfo.formatVersion[2] &&
51 //pInfo->formatVersion[3]==invUcaDataInfo.formatVersion[3] &&
52 ) {
53 UVersionInfo UCDVersion;
54 u_getUnicodeVersion(UCDVersion);
55 if(pInfo->dataVersion[0]==UCDVersion[0] &&
56 pInfo->dataVersion[1]==UCDVersion[1]) {
57 //pInfo->dataVersion[1]==invUcaDataInfo.dataVersion[1] &&
58 //pInfo->dataVersion[2]==invUcaDataInfo.dataVersion[2] &&
59 //pInfo->dataVersion[3]==invUcaDataInfo.dataVersion[3]) {
60 return TRUE;
61 } else {
62 return FALSE;
63 }
64 } else {
65 return FALSE;
66 }
67}
68U_CDECL_END
69
70static
71int32_t ucol_inv_findCE(uint32_t CE, uint32_t SecondCE) {
72 uint32_t bottom = 0, top = invUCA->tableSize;
73 uint32_t i = 0;
74 uint32_t first = 0, second = 0;
75 uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
76
77 while(bottom < top-1) {
78 i = (top+bottom)/2;
79 first = *(CETable+3*i);
80 second = *(CETable+3*i+1);
81 if(first > CE) {
82 top = i;
83 } else if(first < CE) {
84 bottom = i;
85 } else {
86 if(second > SecondCE) {
87 top = i;
88 } else if(second < SecondCE) {
89 bottom = i;
90 } else {
91 break;
92 }
93 }
94 }
95
96 /* weiv: */
97 /* in searching for elements, I have removed the failure */
98 /* The reason for this is that the builder does not rely */
99 /* on search mechanism telling it that it didn't find an */
100 /* element. However, indirect positioning relies on being */
101 /* able to find the elements around any CE, even if it is */
102 /* not defined in the UCA. */
103 return i;
104/*
105 if((first == CE && second == SecondCE)) {
106 return i;
107 } else {
108 return -1;
109 }
110*/
111}
112
113static const uint32_t strengthMask[UCOL_CE_STRENGTH_LIMIT] = {
114 0xFFFF0000,
115 0xFFFFFF00,
116 0xFFFFFFFF
117};
118
119U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(uint32_t CE, uint32_t contCE,
120 uint32_t *nextCE, uint32_t *nextContCE,
121 uint32_t strength) {
122 uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
123 int32_t iCE;
124
125 iCE = ucol_inv_findCE(CE, contCE);
126
127 if(iCE<0) {
128 *nextCE = UCOL_NOT_FOUND;
129 return -1;
130 }
131
132 CE &= strengthMask[strength];
133 contCE &= strengthMask[strength];
134
135 *nextCE = CE;
136 *nextContCE = contCE;
137
138 while((*nextCE & strengthMask[strength]) == CE
139 && (*nextContCE & strengthMask[strength]) == contCE) {
140 *nextCE = (*(CETable+3*(++iCE)));
141 *nextContCE = (*(CETable+3*(iCE)+1));
142 }
143
144 return iCE;
145}
146
147U_CAPI int32_t U_EXPORT2 ucol_inv_getPrevCE(uint32_t CE, uint32_t contCE,
148 uint32_t *prevCE, uint32_t *prevContCE,
149 uint32_t strength) {
150 uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
151 int32_t iCE;
152
153 iCE = ucol_inv_findCE(CE, contCE);
154
155 if(iCE<0) {
156 *prevCE = UCOL_NOT_FOUND;
157 return -1;
158 }
159
160 CE &= strengthMask[strength];
161 contCE &= strengthMask[strength];
162
163 *prevCE = CE;
164 *prevContCE = contCE;
165
166 while((*prevCE & strengthMask[strength]) == CE
167 && (*prevContCE & strengthMask[strength])== contCE
168 && iCE > 0) { /* this condition should prevent falling off the edge of the world */
169 /* here, we end up in a singularity - zero */
170 *prevCE = (*(CETable+3*(--iCE)));
171 *prevContCE = (*(CETable+3*(iCE)+1));
172 }
173
174 return iCE;
175}
176
177static
178inline int32_t ucol_inv_getPrevious(UColTokListHeader *lh, uint32_t strength) {
179
180 uint32_t CE = lh->baseCE;
181 uint32_t SecondCE = lh->baseContCE;
182
183 uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
184 uint32_t previousCE, previousContCE;
185 int32_t iCE;
186
187 iCE = ucol_inv_findCE(CE, SecondCE);
188
189 if(iCE<0) {
190 return -1;
191 }
192
193 CE &= strengthMask[strength];
194 SecondCE &= strengthMask[strength];
195
196 previousCE = CE;
197 previousContCE = SecondCE;
198
199 while((previousCE & strengthMask[strength]) == CE && (previousContCE & strengthMask[strength])== SecondCE) {
200 previousCE = (*(CETable+3*(--iCE)));
201 previousContCE = (*(CETable+3*(iCE)+1));
202 }
203 lh->previousCE = previousCE;
204 lh->previousContCE = previousContCE;
205
206 return iCE;
207}
208
209static
210inline int32_t ucol_inv_getNext(UColTokListHeader *lh, uint32_t strength) {
211 uint32_t CE = lh->baseCE;
212 uint32_t SecondCE = lh->baseContCE;
213
214 uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
215 uint32_t nextCE, nextContCE;
216 int32_t iCE;
217
218 iCE = ucol_inv_findCE(CE, SecondCE);
219
220 if(iCE<0) {
221 return -1;
222 }
223
224 CE &= strengthMask[strength];
225 SecondCE &= strengthMask[strength];
226
227 nextCE = CE;
228 nextContCE = SecondCE;
229
230 while((nextCE & strengthMask[strength]) == CE
231 && (nextContCE & strengthMask[strength]) == SecondCE) {
232 nextCE = (*(CETable+3*(++iCE)));
233 nextContCE = (*(CETable+3*(iCE)+1));
234 }
235
236 lh->nextCE = nextCE;
237 lh->nextContCE = nextContCE;
238
239 return iCE;
240}
241
242U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) {
243 /* reset all the gaps */
244 int32_t i = 0;
245 uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table);
246 uint32_t st = 0;
247 uint32_t t1, t2;
248 int32_t pos;
249
250 UColToken *tok = lh->first;
251 uint32_t tokStrength = tok->strength;
252
253 for(i = 0; i<3; i++) {
254 lh->gapsHi[3*i] = 0;
255 lh->gapsHi[3*i+1] = 0;
256 lh->gapsHi[3*i+2] = 0;
257 lh->gapsLo[3*i] = 0;
258 lh->gapsLo[3*i+1] = 0;
259 lh->gapsLo[3*i+2] = 0;
260 lh->numStr[i] = 0;
261 lh->fStrToken[i] = NULL;
262 lh->lStrToken[i] = NULL;
263 lh->pos[i] = -1;
264 }
265
266 UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
267
268 if(lh->baseCE >= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && lh->baseCE < (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */
269 //if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */
270 lh->pos[0] = 0;
271 t1 = lh->baseCE;
272 t2 = lh->baseContCE;
273 lh->gapsLo[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
274 lh->gapsLo[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
275 lh->gapsLo[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
276 if(lh->baseCE < 0xEF000000) {
277 /* first implicits have three byte primaries, with a gap of one */
278 /* so we esentially need to add 2 to the top byte in lh->baseContCE */
279 t2 += 0x02000000;
280 } else {
281 /* second implicits have four byte primaries, with a gap of IMPLICIT_LAST2_MULTIPLIER_ */
282 /* Now, this guy is not really accessible here, so until we find a better way to pass it */
283 /* around, we'll assume that the gap is 1 */
284 t2 += 0x00020000;
285 }
286 lh->gapsHi[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
287 lh->gapsHi[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
288 lh->gapsHi[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
289 } else if(lh->indirect == TRUE && lh->nextCE != 0) {
290 //} else if(lh->baseCE == UCOL_RESET_TOP_VALUE && lh->baseContCE == 0) {
291 lh->pos[0] = 0;
292 t1 = lh->baseCE;
293 t2 = lh->baseContCE;
294 lh->gapsLo[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
295 lh->gapsLo[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
296 lh->gapsLo[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
297 t1 = lh->nextCE;
298 t2 = lh->nextContCE;
299 lh->gapsHi[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
300 lh->gapsHi[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
301 lh->gapsHi[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
302 } else {
303 for(;;) {
304 if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {
305 if((lh->pos[tokStrength] = ucol_inv_getNext(lh, tokStrength)) >= 0) {
306 lh->fStrToken[tokStrength] = tok;
307 } else { /* The CE must be implicit, since it's not in the table */
308 /* Error */
309 *status = U_INTERNAL_PROGRAM_ERROR;
310 }
311 }
312
313 while(tok != NULL && tok->strength >= tokStrength) {
314 if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {
315 lh->lStrToken[tokStrength] = tok;
316 }
317 tok = tok->next;
318 }
319 if(tokStrength < UCOL_CE_STRENGTH_LIMIT-1) {
320 /* check if previous interval is the same and merge the intervals if it is so */
321 if(lh->pos[tokStrength] == lh->pos[tokStrength+1]) {
322 lh->fStrToken[tokStrength] = lh->fStrToken[tokStrength+1];
323 lh->fStrToken[tokStrength+1] = NULL;
324 lh->lStrToken[tokStrength+1] = NULL;
325 lh->pos[tokStrength+1] = -1;
326 }
327 }
328 if(tok != NULL) {
329 tokStrength = tok->strength;
330 } else {
331 break;
332 }
333 }
334 for(st = 0; st < 3; st++) {
335 if((pos = lh->pos[st]) >= 0) {
336 t1 = *(CETable+3*(pos));
337 t2 = *(CETable+3*(pos)+1);
338 lh->gapsHi[3*st] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
339 lh->gapsHi[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
340 //lh->gapsHi[3*st+2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16;
341 lh->gapsHi[3*st+2] = (t1&0x3f) << 24 | (t2&0x3f) << 16;
342 pos--;
343 t1 = *(CETable+3*(pos));
344 t2 = *(CETable+3*(pos)+1);
345 lh->gapsLo[3*st] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16;
346 lh->gapsLo[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8;
347 lh->gapsLo[3*st+2] = (t1&0x3f) << 24 | (t2&0x3f) << 16;
348 }
349 }
350 }
351}
352
353
354#define ucol_countBytes(value, noOfBytes) \
355{ \
356 uint32_t mask = 0xFFFFFFFF; \
357 (noOfBytes) = 0; \
358 while(mask != 0) { \
359 if(((value) & mask) != 0) { \
360 (noOfBytes)++; \
361 } \
362 mask >>= 8; \
363 } \
364}
365
366U_CFUNC uint32_t ucol_getNextGenerated(ucolCEGenerator *g, UErrorCode *status) {
367 if(U_SUCCESS(*status)) {
368 g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);
369 }
370 return g->current;
371}
372
373U_CFUNC uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator *g, UColToken *tok, uint32_t strength, UErrorCode *status) {
374/* TODO: rename to enum names */
375 uint32_t high, low, count=1;
376 uint32_t maxByte = (strength == UCOL_TERTIARY)?0x3F:0xFF;
377
378 if(strength == UCOL_SECONDARY) {
379 low = UCOL_COMMON_TOP2<<24;
380 high = 0xFFFFFFFF;
381 count = 0xFF - UCOL_COMMON_TOP2;
382 } else {
383 low = UCOL_BYTE_COMMON << 24; //0x05000000;
384 high = 0x40000000;
385 count = 0x40 - UCOL_BYTE_COMMON;
386 }
387
388 if(tok->next != NULL && tok->next->strength == strength) {
389 count = tok->next->toInsert;
390 }
391
392 g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);
393 g->current = UCOL_BYTE_COMMON<<24;
394
395 if(g->noOfRanges == 0) {
396 *status = U_INTERNAL_PROGRAM_ERROR;
397 }
398 return g->current;
399}
400
401U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_t* highs, UColToken *tok, uint32_t fStrength, UErrorCode *status) {
402 uint32_t strength = tok->strength;
403 uint32_t low = lows[fStrength*3+strength];
404 uint32_t high = highs[fStrength*3+strength];
405 uint32_t maxByte = (strength == UCOL_TERTIARY)?0x3F:0xFF;
406
407 uint32_t count = tok->toInsert;
408
409 if(low >= high && strength > UCOL_PRIMARY) {
410 int32_t s = strength;
411 for(;;) {
412 s--;
413 if(lows[fStrength*3+s] != highs[fStrength*3+s]) {
414 if(strength == UCOL_SECONDARY) {
415 low = UCOL_COMMON_TOP2<<24;
416 high = 0xFFFFFFFF;
417 } else {
418 //low = 0x02000000; // This needs to be checked - what if low is
419 // not good...
420 high = 0x40000000;
421 }
422 break;
423 }
424 if(s<0) {
425 *status = U_INTERNAL_PROGRAM_ERROR;
426 return 0;
427 }
428 }
429 }
430
431 if(low == 0) {
432 low = 0x01000000;
433 }
434
435 if(strength == UCOL_SECONDARY) { /* similar as simple */
436 if(low >= (UCOL_COMMON_BOT2<<24) && low < (uint32_t)(UCOL_COMMON_TOP2<<24)) {
437 low = UCOL_COMMON_TOP2<<24;
438 }
439 if(high > (UCOL_COMMON_BOT2<<24) && high < (uint32_t)(UCOL_COMMON_TOP2<<24)) {
440 high = UCOL_COMMON_TOP2<<24;
441 }
442 if(low < UCOL_COMMON_BOT2<<24) {
443 g->noOfRanges = ucol_allocWeights(UCOL_COMMON_TOP2<<24, high, count, maxByte, g->ranges);
444 g->current = UCOL_COMMON_BOT2;
445 return g->current;
446 }
447 }
448
449 g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);
450 if(g->noOfRanges == 0) {
451 *status = U_INTERNAL_PROGRAM_ERROR;
452 }
453 g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);
454 return g->current;
455}
456
457U_CFUNC void ucol_doCE(uint32_t *CEparts, UColToken *tok) {
458 /* this one makes the table and stuff */
459 uint32_t noOfBytes[3];
460 uint32_t i;
461
462 for(i = 0; i<3; i++) {
463 ucol_countBytes(CEparts[i], noOfBytes[i]);
464 }
465
466 /* Here we have to pack CEs from parts */
467
468 uint32_t CEi = 0;
469 uint32_t value = 0;
470
471 while(2*CEi<noOfBytes[0] || CEi<noOfBytes[1] || CEi<noOfBytes[2]) {
472 if(CEi > 0) {
473 value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
474 } else {
475 value = 0;
476 }
477
478 if(2*CEi<noOfBytes[0]) {
479 value |= ((CEparts[0]>>(32-16*(CEi+1))) & 0xFFFF) << 16;
480 }
481 if(CEi<noOfBytes[1]) {
482 value |= ((CEparts[1]>>(32-8*(CEi+1))) & 0xFF) << 8;
483 }
484 if(CEi<noOfBytes[2]) {
485 value |= ((CEparts[2]>>(32-8*(CEi+1))) & 0x3F);
486 }
487 tok->CEs[CEi] = value;
488 CEi++;
489 }
490 if(CEi == 0) { /* totally ignorable */
491 tok->noOfCEs = 1;
492 tok->CEs[0] = 0;
493 } else { /* there is at least something */
494 tok->noOfCEs = CEi;
495 }
496
497#if UCOL_DEBUG==2
498 fprintf(stderr, "%04X str: %i, [%08X, %08X, %08X]: tok: ", tok->debugSource, tok->strength, CEparts[0] >> (32-8*noOfBytes[0]), CEparts[1] >> (32-8*noOfBytes[1]), CEparts[2]>> (32-8*noOfBytes[2]));
499 for(i = 0; i<tok->noOfCEs; i++) {
500 fprintf(stderr, "%08X ", tok->CEs[i]);
501 }
502 fprintf(stderr, "\n");
503#endif
504}
505
506U_CFUNC void ucol_initBuffers(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) {
507 ucolCEGenerator Gens[UCOL_CE_STRENGTH_LIMIT];
508 uint32_t CEparts[UCOL_CE_STRENGTH_LIMIT];
509
510 UColToken *tok = lh->last;
511 uint32_t t[UCOL_STRENGTH_LIMIT];
512
513 uprv_memset(t, 0, UCOL_STRENGTH_LIMIT*sizeof(uint32_t));
514
515 tok->toInsert = 1;
516 t[tok->strength] = 1;
517
518 while(tok->previous != NULL) {
519 if(tok->previous->strength < tok->strength) { /* going up */
520 t[tok->strength] = 0;
521 t[tok->previous->strength]++;
522 } else if(tok->previous->strength > tok->strength) { /* going down */
523 t[tok->previous->strength] = 1;
524 } else {
525 t[tok->strength]++;
526 }
527 tok=tok->previous;
528 tok->toInsert = t[tok->strength];
529 }
530
531 tok->toInsert = t[tok->strength];
532 ucol_inv_getGapPositions(src, lh, status);
533
534#if UCOL_DEBUG
535 fprintf(stderr, "BaseCE: %08X %08X\n", lh->baseCE, lh->baseContCE);
536 int32_t j = 2;
537 for(j = 2; j >= 0; j--) {
538 fprintf(stderr, "gapsLo[%i] [%08X %08X %08X]\n", j, lh->gapsLo[j*3], lh->gapsLo[j*3+1], lh->gapsLo[j*3+2]);
539 fprintf(stderr, "gapsHi[%i] [%08X %08X %08X]\n", j, lh->gapsHi[j*3], lh->gapsHi[j*3+1], lh->gapsHi[j*3+2]);
540 }
541 tok=lh->first[UCOL_TOK_POLARITY_POSITIVE];
542
543 do {
544 fprintf(stderr,"%i", tok->strength);
545 tok = tok->next;
546 } while(tok != NULL);
547 fprintf(stderr, "\n");
548
549 tok=lh->first[UCOL_TOK_POLARITY_POSITIVE];
550
551 do {
552 fprintf(stderr,"%i", tok->toInsert);
553 tok = tok->next;
554 } while(tok != NULL);
555#endif
556
557 tok = lh->first;
558 uint32_t fStrength = UCOL_IDENTICAL;
559 uint32_t initStrength = UCOL_IDENTICAL;
560
561
562 CEparts[UCOL_PRIMARY] = (lh->baseCE & UCOL_PRIMARYMASK) | (lh->baseContCE & UCOL_PRIMARYMASK) >> 16;
563 CEparts[UCOL_SECONDARY] = (lh->baseCE & UCOL_SECONDARYMASK) << 16 | (lh->baseContCE & UCOL_SECONDARYMASK) << 8;
564 CEparts[UCOL_TERTIARY] = (UCOL_TERTIARYORDER(lh->baseCE)) << 24 | (UCOL_TERTIARYORDER(lh->baseContCE)) << 16;
565
566 while (tok != NULL && U_SUCCESS(*status)) {
567 fStrength = tok->strength;
568 if(fStrength < initStrength) {
569 initStrength = fStrength;
570 if(lh->pos[fStrength] == -1) {
571 while(lh->pos[fStrength] == -1 && fStrength > 0) {
572 fStrength--;
573 }
574 if(lh->pos[fStrength] == -1) {
575 *status = U_INTERNAL_PROGRAM_ERROR;
576 return;
577 }
578 }
579 if(initStrength == UCOL_TERTIARY) { /* starting with tertiary */
580 CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];
581 CEparts[UCOL_SECONDARY] = lh->gapsLo[fStrength*3+1];
582 /*CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[2], lh->gapsLo[fStrength*3+2], lh->gapsHi[fStrength*3+2], tok, UCOL_TERTIARY); */
583 CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[UCOL_TERTIARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
584 } else if(initStrength == UCOL_SECONDARY) { /* secondaries */
585 CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];
586 /*CEparts[1] = ucol_getCEGenerator(&Gens[1], lh->gapsLo[fStrength*3+1], lh->gapsHi[fStrength*3+1], tok, 1);*/
587 CEparts[UCOL_SECONDARY] = ucol_getCEGenerator(&Gens[UCOL_SECONDARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
588 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
589 } else { /* primaries */
590 /*CEparts[UCOL_PRIMARY] = ucol_getCEGenerator(&Gens[0], lh->gapsLo[0], lh->gapsHi[0], tok, UCOL_PRIMARY);*/
591 CEparts[UCOL_PRIMARY] = ucol_getCEGenerator(&Gens[UCOL_PRIMARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
592 CEparts[UCOL_SECONDARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_SECONDARY], tok, UCOL_SECONDARY, status);
593 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
594 }
595 } else {
596 if(tok->strength == UCOL_TERTIARY) {
597 CEparts[UCOL_TERTIARY] = ucol_getNextGenerated(&Gens[UCOL_TERTIARY], status);
598 } else if(tok->strength == UCOL_SECONDARY) {
599 CEparts[UCOL_SECONDARY] = ucol_getNextGenerated(&Gens[UCOL_SECONDARY], status);
600 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
601 } else if(tok->strength == UCOL_PRIMARY) {
602 CEparts[UCOL_PRIMARY] = ucol_getNextGenerated(&Gens[UCOL_PRIMARY], status);
603 CEparts[UCOL_SECONDARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_SECONDARY], tok, UCOL_SECONDARY, status);
604 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status);
605 }
606 }
607 ucol_doCE(CEparts, tok);
608 tok = tok->next;
609 }
610}
611
612static
613uint32_t u_toLargeKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) {
614 uint32_t i = 0;
615 UChar c;
616
617 if(U_FAILURE(*status)) {
618 return 0;
619 }
620
621 if(sourceLen > resLen) {
622 *status = U_MEMORY_ALLOCATION_ERROR;
623 return 0;
624 }
625
626 for(i = 0; i < sourceLen; i++) {
627 c = source[i];
628 if(0x3042 < c && c < 0x30ef) { /* Kana range */
629 switch(c - 0x3000) {
630 case 0x41: case 0x43: case 0x45: case 0x47: case 0x49: case 0x63: case 0x83: case 0x85: case 0x8E:
631 case 0xA1: case 0xA3: case 0xA5: case 0xA7: case 0xA9: case 0xC3: case 0xE3: case 0xE5: case 0xEE:
632 c++;
633 break;
634 case 0xF5:
635 c = 0x30AB;
636 break;
637 case 0xF6:
638 c = 0x30B1;
639 break;
640 }
641 }
642 resBuf[i] = c;
643 }
644 return sourceLen;
645}
646
647static
648uint32_t u_toSmallKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) {
649 uint32_t i = 0;
650 UChar c;
651
652 if(U_FAILURE(*status)) {
653 return 0;
654 }
655
656 if(sourceLen > resLen) {
657 *status = U_MEMORY_ALLOCATION_ERROR;
658 return 0;
659 }
660
661 for(i = 0; i < sourceLen; i++) {
662 c = source[i];
663 if(0x3042 < c && c < 0x30ef) { /* Kana range */
664 switch(c - 0x3000) {
665 case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F:
666 case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF:
667 c--;
668 break;
669 case 0xAB:
670 c = 0x30F5;
671 break;
672 case 0xB1:
673 c = 0x30F6;
674 break;
675 }
676 }
677 resBuf[i] = c;
678 }
679 return sourceLen;
680}
681
682static
683uint8_t ucol_uprv_getCaseBits(const UCollator *UCA, const UChar *src, uint32_t len, UErrorCode *status) {
684 uint32_t i = 0;
685 UChar n[128];
686 uint32_t nLen = 0;
687 uint32_t uCount = 0, lCount = 0;
688
689 collIterate s;
690 uint32_t order = 0;
691
692 if(U_FAILURE(*status)) {
693 return UCOL_LOWER_CASE;
694 }
695
696 nLen = unorm_normalize(src, len, UNORM_NFKD, 0, n, 128, status);
697 if(U_SUCCESS(*status)) {
698 for(i = 0; i < nLen; i++) {
699 uprv_init_collIterate(UCA, &n[i], 1, &s);
700 order = ucol_getNextCE(UCA, &s, status);
701 if(isContinuation(order)) {
702 *status = U_INTERNAL_PROGRAM_ERROR;
703 return UCOL_LOWER_CASE;
704 }
705 if((order&UCOL_CASE_BIT_MASK)== UCOL_UPPER_CASE) {
706 uCount++;
707 } else {
708 if(u_islower(n[i])) {
709 lCount++;
710 } else {
711 UChar sk[1], lk[1];
712 u_toSmallKana(&n[i], 1, sk, 1, status);
713 u_toLargeKana(&n[i], 1, lk, 1, status);
714 if(sk[0] == n[i] && lk[0] != n[i]) {
715 lCount++;
716 }
717 }
718 }
719 }
720 }
721
722 if(uCount != 0 && lCount != 0) {
723 return UCOL_MIXED_CASE;
724 } else if(uCount != 0) {
725 return UCOL_UPPER_CASE;
726 } else {
727 return UCOL_LOWER_CASE;
728 }
729}
730
731U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokListHeader *lh, UErrorCode *status) {
732 UCAElements el;
733 UColToken *tok = lh->first;
734 UColToken *expt = NULL;
735 uint32_t i = 0, j = 0;
736
737 while(tok != NULL && U_SUCCESS(*status)) {
738 /* first, check if there are any expansions */
739 /* if there are expansions, we need to do a little bit more processing */
740 /* since parts of expansion can be tailored, while others are not */
741 if(tok->expansion != 0) {
742 uint32_t len = tok->expansion >> 24;
743 uint32_t currentSequenceLen = len;
744 uint32_t expOffset = tok->expansion & 0x00FFFFFF;
745 //uint32_t exp = currentSequenceLen | expOffset;
746 UColToken exp;
747 exp.source = currentSequenceLen | expOffset;
748 exp.rulesToParse = src->source;
749
750 while(len > 0) {
751 currentSequenceLen = len;
752 while(currentSequenceLen > 0) {
753 exp.source = (currentSequenceLen << 24) | expOffset;
754 if((expt = (UColToken *)uhash_get(src->tailored, &exp)) != NULL && expt->strength != UCOL_TOK_RESET) { /* expansion is tailored */
755 uint32_t noOfCEsToCopy = expt->noOfCEs;
756 for(j = 0; j<noOfCEsToCopy; j++) {
757 tok->expCEs[tok->noOfExpCEs + j] = expt->CEs[j];
758 }
759 tok->noOfExpCEs += noOfCEsToCopy;
760 // Smart people never try to add codepoints and CEs.
761 // For some odd reason, it won't work.
762 expOffset += currentSequenceLen; //noOfCEsToCopy;
763 len -= currentSequenceLen; //noOfCEsToCopy;
764 break;
765 } else {
766 currentSequenceLen--;
767 }
768 }
769 if(currentSequenceLen == 0) { /* couldn't find any tailored subsequence */
770 /* will have to get one from UCA */
771 /* first, get the UChars from the rules */
772 /* then pick CEs out until there is no more and stuff them into expansion */
773 collIterate s;
774 uint32_t order = 0;
775 uprv_init_collIterate(src->UCA, expOffset + src->source, 1, &s);
776
777 for(;;) {
778 order = ucol_getNextCE(src->UCA, &s, status);
779 if(order == UCOL_NO_MORE_CES) {
780 break;
781 }
782 tok->expCEs[tok->noOfExpCEs++] = order;
783 }
784 expOffset++;
785 len--;
786 }
787 }
788 } else {
789 tok->noOfExpCEs = 0;
790 }
791
792 /* set the ucaelement with obtained values */
793 el.noOfCEs = tok->noOfCEs + tok->noOfExpCEs;
794 /* copy CEs */
795 for(i = 0; i<tok->noOfCEs; i++) {
796 el.CEs[i] = tok->CEs[i];
797 }
798 for(i = 0; i<tok->noOfExpCEs; i++) {
799 el.CEs[i+tok->noOfCEs] = tok->expCEs[i];
800 }
801
802 /* copy UChars */
803 // We kept prefix and source kind of together, as it is a kind of a contraction.
804 // However, now we have to slice the prefix off the main thing -
805 el.prefix = el.prefixChars;
806 el.cPoints = el.uchars;
807 if(tok->prefix != 0) { // we will just copy the prefix here, and adjust accordingly in the
808 // addPrefix function in ucol_elm. The reason is that we need to add both composed AND
809 // decomposed elements to the unsaf table.
810 el.prefixSize = tok->prefix>>24;
811 uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el.prefixSize*sizeof(UChar));
812
813 el.cSize = (tok->source >> 24)-(tok->prefix>>24);
814 uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar));
815 } else {
816 el.prefixSize = 0;
817 *el.prefix = 0;
818
819 el.cSize = (tok->source >> 24);
820 uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar));
821 }
822
823 if(UCOL_ISTHAIPREVOWEL(el.cPoints[0])) {
824 el.isThai = TRUE;
825 } else {
826 el.isThai = FALSE;
827 }
828
829 if(src->UCA != NULL) {
830 for(i = 0; i<el.cSize; i++) {
831 if(UCOL_ISJAMO(el.cPoints[i])) {
832 t->image->jamoSpecial = TRUE;
833 }
834 }
835 }
836
837 // Case bits handling
838 el.CEs[0] &= 0xFFFFFF3F; // Clean the case bits field
839 if(el.cSize > 1) {
840 // Do it manually
841 el.CEs[0] |= ucol_uprv_getCaseBits(src->UCA, el.cPoints, el.cSize, status);
842 } else {
843 // Copy it from the UCA
844 uint32_t caseCE = ucol_getFirstCE(src->UCA, el.cPoints[0], status);
845 el.CEs[0] |= (caseCE & 0xC0);
846 }
847
848 /* and then, add it */
849#if UCOL_DEBUG==2
850 fprintf(stderr, "Adding: %04X with %08X\n", el.cPoints[0], el.CEs[0]);
851#endif
852 uprv_uca_addAnElement(t, &el, status);
853
854#if 0
855 if(el.cSize > 1) { // this is a contraction, we should check whether a composed form should also be included
856 UChar composed[256];
857 uint32_t compLen = unorm_normalize(el.cPoints, el.cSize, UNORM_NFC, 0, composed, 256, status);;
858
859 if(compLen != el.cSize || uprv_memcmp(composed, el.cPoints, el.cSize*sizeof(UChar))) {
860 // composed form of a contraction is different than the decomposed form!
861 // do it!
862#ifdef UCOL_DEBUG
863 fprintf(stderr, "Adding composed for %04X->%04X\n", *element->cPoints, *composed);
864#endif
865 el.cSize = compLen;
866 uprv_memcpy(el.cPoints, composed, el.cSize*sizeof(UChar));
867 uprv_uca_addAnElement(t, &el, status);
868 }
869 }
870#endif
871
872#if UCOL_DEBUG_DUPLICATES
873 if(*status != U_ZERO_ERROR) {
874 fprintf(stderr, "replaced CE for %04X with CE for %04X\n", el.cPoints[0], tok->debugSource);
875 *status = U_ZERO_ERROR;
876 }
877#endif
878
879 tok = tok->next;
880 }
881}
882
883U_CDECL_BEGIN
884static UBool U_CALLCONV
885_processUCACompleteIgnorables(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
886 UErrorCode status = U_ZERO_ERROR;
887 tempUCATable *t = (tempUCATable *)context;
888 if(value == 0) {
889 while(start < limit) {
890 uint32_t CE = utrie_get32(t->mapping, start, NULL);
891 if(CE == UCOL_NOT_FOUND) {
892 UCAElements el;
893 el.isThai = FALSE;
894 el.prefixSize = 0;
895 el.prefixChars[0] = 0;
896 el.prefix = el.prefixChars;
897 el.cPoints = el.uchars;
898
899 el.cSize = 0;
900 UTF_APPEND_CHAR(el.uchars, el.cSize, 1024, start);
901
902 el.noOfCEs = 1;
903 el.CEs[0] = 0;
904 uprv_uca_addAnElement(t, &el, &status);
905
906 }
907 start++;
908 }
909 }
910 if(U_FAILURE(status)) {
911 return FALSE;
912 } else {
913 return TRUE;
914 }
915}
916U_CDECL_END
917
918static void
919ucol_uprv_bld_copyRangeFromUCA(UColTokenParser *src, tempUCATable *t,
920 UChar32 start, UChar32 end,
921 UErrorCode *status) {
922 //UChar decomp[256];
923 uint32_t CE = UCOL_NOT_FOUND;
924 UChar32 u = 0;
925 UCAElements el;
926 el.isThai = FALSE;
927 el.prefixSize = 0;
928 el.prefixChars[0] = 0;
929 collIterate colIt;
930
931 if(U_SUCCESS(*status)) {
932 for(u = start; u<=end; u++) {
933 if((CE = utrie_get32(t->mapping, u, NULL)) == UCOL_NOT_FOUND
934 /* this test is for contractions that are missing the starting element. */
935 || ((isCntTableElement(CE)) &&
936 (uprv_cnttab_getCE(t->contractions, CE, 0, status) == UCOL_NOT_FOUND))
937 ) {
938 el.cSize = 0;
939 U16_APPEND_UNSAFE(el.uchars, el.cSize, u);
940 //decomp[0] = (UChar)u;
941 //el.uchars[0] = (UChar)u;
942 el.cPoints = el.uchars;
943 //el.cSize = 1;
944 el.noOfCEs = 0;
945 el.prefix = el.prefixChars;
946 el.prefixSize = 0;
947 //uprv_init_collIterate(src->UCA, decomp, 1, &colIt);
948 // We actually want to check whether this element is a special
949 // If it is an implicit element (hangul, CJK - we want to copy the
950 // special, not the resolved CEs) - for hangul, copying resolved
951 // would just make things the same (there is an expansion and it
952 // takes approximately the same amount of time to resolve as
953 // falling back to the UCA).
954 /*
955 UTRIE_GET32(src->UCA->mapping, u, CE);
956 tag = getCETag(CE);
957 if(tag == HANGUL_SYLLABLE_TAG || tag == CJK_IMPLICIT_TAG
958 || tag == IMPLICIT_TAG || tag == TRAIL_SURROGATE_TAG
959 || tag == LEAD_SURROGATE_TAG) {
960 el.CEs[el.noOfCEs++] = CE;
961 } else {
962 */
963 // It turns out that it does not make sense to keep implicits
964 // unresolved. The cost of resolving them is big enough so that
965 // it doesn't make any difference whether we have to go to the UCA
966 // or not.
967 {
968 uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt);
969 while(CE != UCOL_NO_MORE_CES) {
970 CE = ucol_getNextCE(src->UCA, &colIt, status);
971 if(CE != UCOL_NO_MORE_CES) {
972 el.CEs[el.noOfCEs++] = CE;
973 }
974 }
975 }
976 uprv_uca_addAnElement(t, &el, status);
977 }
978 }
979 }
980}
981
982UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *status) {
983 uint32_t i = 0;
984 if(U_FAILURE(*status)) {
985 return NULL;
986 }
987/*
9882. Eliminate the negative lists by doing the following for each non-null negative list:
989 o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,
990 create new ListHeader X
991 o reverse the list, add to the end of X's positive list. Reset the strength of the
992 first item you add, based on the stronger strength levels of the two lists.
993*/
994/*
9953. For each ListHeader with a non-null positive list:
996*/
997/*
998 o Find all character strings with CEs between the baseCE and the
999 next/previous CE, at the strength of the first token. Add these to the
1000 tailoring.
1001 ? That is, if UCA has ... x <<< X << x' <<< X' < y ..., and the
1002 tailoring has & x < z...
1003 ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
1004*/
1005 /* It is possible that this part should be done even while constructing list */
1006 /* The problem is that it is unknown what is going to be the strongest weight */
1007 /* So we might as well do it here */
1008
1009/*
1010 o Allocate CEs for each token in the list, based on the total number N of the
1011 largest level difference, and the gap G between baseCE and nextCE at that
1012 level. The relation * between the last item and nextCE is the same as the
1013 strongest strength.
1014 o Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1)
1015 ? There are 3 primary items: a, d, e. Fit them into the primary gap.
1016 Then fit b and c into the secondary gap between a and d, then fit q
1017 into the tertiary gap between b and c.
1018
1019 o Example: baseCE << b <<< q << c * nextCE(X,2)
1020 ? There are 2 secondary items: b, c. Fit them into the secondary gap.
1021 Then fit q into the tertiary gap between b and c.
1022 o When incrementing primary values, we will not cross high byte
1023 boundaries except where there is only a single-byte primary. That is to
1024 ensure that the script reordering will continue to work.
1025*/
1026 UCATableHeader *image = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
1027 /* test for NULL */
1028 if (image == NULL) {
1029 *status = U_MEMORY_ALLOCATION_ERROR;
1030 return NULL;
1031 }
1032 uprv_memcpy(image, src->UCA->image, sizeof(UCATableHeader));
1033
1034 for(i = 0; i<src->resultLen; i++) {
1035 /* now we need to generate the CEs */
1036 /* We stuff the initial value in the buffers, and increase the appropriate buffer */
1037 /* According to strength */
1038 if(U_SUCCESS(*status)) {
1039 ucol_initBuffers(src, &src->lh[i], status);
1040 }
1041 if(U_FAILURE(*status)) {
1042 return NULL;
1043 }
1044
1045 }
1046
1047 if(src->varTop != NULL) { /* stuff the variable top value */
1048 src->opts->variableTopValue = (*(src->varTop->CEs))>>16;
1049 /* remove it from the list */
1050 if(src->varTop->listHeader->first == src->varTop) { /* first in list */
1051 src->varTop->listHeader->first = src->varTop->next;
1052 }
1053 if(src->varTop->listHeader->last == src->varTop) { /* first in list */
1054 src->varTop->listHeader->last = src->varTop->previous;
1055 }
1056 if(src->varTop->next != NULL) {
1057 src->varTop->next->previous = src->varTop->previous;
1058 }
1059 if(src->varTop->previous != NULL) {
1060 src->varTop->previous->next = src->varTop->next;
1061 }
1062 }
1063
1064
1065 tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, status);
1066
1067
1068 /* After this, we have assigned CE values to all regular CEs */
1069 /* now we will go through list once more and resolve expansions, */
1070 /* make UCAElements structs and add them to table */
1071 for(i = 0; i<src->resultLen; i++) {
1072 /* now we need to generate the CEs */
1073 /* We stuff the initial value in the buffers, and increase the appropriate buffer */
1074 /* According to strength */
1075 if(U_SUCCESS(*status)) {
1076 ucol_createElements(src, t, &src->lh[i], status);
1077 }
1078 }
1079
1080 UCAElements el;
1081 el.isThai = FALSE;
1082 el.prefixSize = 0;
1083 el.prefixChars[0] = 0;
1084
1085 /* add latin-1 stuff */
1086 ucol_uprv_bld_copyRangeFromUCA(src, t, 0, 0xFF, status);
1087
1088 /* add stuff for copying */
1089 if(src->copySet != NULL) {
1090 int32_t i = 0;
1091 UnicodeSet *set = (UnicodeSet *)src->copySet;
1092 for(i = 0; i < set->getRangeCount(); i++) {
1093 ucol_uprv_bld_copyRangeFromUCA(src, t, set->getRangeStart(i), set->getRangeEnd(i), status);
1094 }
1095 }
1096
1097 if(U_SUCCESS(*status)) {
1098 /* copy contractions from the UCA - this is felt mostly for cyrillic*/
1099
1100 uint32_t tailoredCE = UCOL_NOT_FOUND;
1101 //UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts+sizeof(UCAConstants));
1102 UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->contractionUCACombos);
1103 UCollationElements *ucaEl = ucol_openElements(src->UCA, NULL, 0, status);
1104 while(*conts != 0) {
1105 /*tailoredCE = ucmpe32_get(t->mapping, *conts);*/
1106 tailoredCE = utrie_get32(t->mapping, *conts, NULL);
1107 if(tailoredCE != UCOL_NOT_FOUND) {
1108 UBool needToAdd = TRUE;
1109 if(isCntTableElement(tailoredCE)) {
1110 if(uprv_cnttab_isTailored(t->contractions, tailoredCE, conts+1, status) == TRUE) {
1111 needToAdd = FALSE;
1112 }
1113 }
1114 if(src->removeSet != NULL && uset_contains(src->removeSet, *conts)) {
1115 needToAdd = FALSE;
1116 }
1117
1118 if(needToAdd == TRUE) { // we need to add if this contraction is not tailored.
1119 el.prefix = el.prefixChars;
1120 el.prefixSize = 0;
1121 el.cPoints = el.uchars;
1122 el.noOfCEs = 0;
1123 el.uchars[0] = *conts;
1124 el.uchars[1] = *(conts+1);
1125 if(*(conts+2)!=0) {
1126 el.uchars[2] = *(conts+2);
1127 el.cSize = 3;
1128 } else {
1129 el.cSize = 2;
1130 }
1131 ucol_setText(ucaEl, el.uchars, el.cSize, status);
1132 while ((el.CEs[el.noOfCEs] = ucol_next(ucaEl, status)) != UCOL_NULLORDER) {
1133 el.noOfCEs++;
1134 }
1135 uprv_uca_addAnElement(t, &el, status);
1136 }
1137
1138 } else if(src->removeSet != NULL && uset_contains(src->removeSet, *conts)) {
1139 ucol_uprv_bld_copyRangeFromUCA(src, t, *conts, *conts, status);
1140 }
1141 conts+=3;
1142 }
1143 ucol_closeElements(ucaEl);
1144 }
1145
1146 // Add completely ignorable elements
1147 utrie_enum(t->UCA->mapping, NULL, _processUCACompleteIgnorables, t);
1148
1149
1150 // canonical closure
1151 uprv_uca_canonicalClosure(t, status);
1152
1153
1154 /* still need to produce compatibility closure */
1155
1156 UCATableHeader *myData = uprv_uca_assembleTable(t, status);
1157
1158 uprv_uca_closeTempTable(t);
1159 uprv_free(image);
1160
1161 return myData;
1162}
1163
1164UBool
1165ucol_bld_cleanup(void)
1166{
1167 udata_close(invUCA_DATA_MEM);
1168 invUCA_DATA_MEM = NULL;
1169 invUCA = NULL;
1170 return TRUE;
1171}
1172
1173U_CAPI const InverseUCATableHeader * U_EXPORT2
1174ucol_initInverseUCA(UErrorCode *status)
1175{
1176 if(U_FAILURE(*status)) return NULL;
1177
1178 umtx_lock(NULL);
1179 UBool f = (invUCA == NULL);
1180 umtx_unlock(NULL);
1181
1182 if(f) {
1183 InverseUCATableHeader *newInvUCA = NULL;
1184 UDataMemory *result = udata_openChoice(NULL, INVC_DATA_TYPE, INVC_DATA_NAME, isAcceptableInvUCA, NULL, status);
1185
1186 if(U_FAILURE(*status)) {
1187 if (result) {
1188 udata_close(result);
1189 }
1190 // This is not needed, as we are talking about
1191 // memory we got from UData
1192 //uprv_free(newInvUCA);
1193 }
1194
1195 if(result != NULL) { /* It looks like sometimes we can fail to find the data file */
1196 newInvUCA = (InverseUCATableHeader *)udata_getMemory(result);
1197 UCollator *UCA = ucol_initUCA(status);
1198 // UCA versions of UCA and inverse UCA should match
1199 if(uprv_memcmp(newInvUCA->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0) {
1200 *status = U_INVALID_FORMAT_ERROR;
1201 udata_close(result);
1202 return NULL;
1203 }
1204
1205 umtx_lock(NULL);
1206 if(invUCA == NULL) {
1207 invUCA = newInvUCA;
1208 invUCA_DATA_MEM = result;
1209 result = NULL;
1210 newInvUCA = NULL;
1211 }
1212 umtx_unlock(NULL);
1213
1214 if(newInvUCA != NULL) {
1215 udata_close(result);
1216 // This is not needed, as we are talking about
1217 // memory we got from UData
1218 //uprv_free(newInvUCA);
1219 }
1220 else {
1221 ucln_i18n_registerCleanup();
1222 }
1223 }
1224 }
1225 return invUCA;
1226}
1227
1228#endif /* #if !UCONFIG_NO_COLLATION */