]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_elm.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_elm.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
729e4ab9 4* Copyright (C) 2001-2010, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: ucaelems.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created 02/22/2001
14* created by: Vladimir Weinstein
15*
16* This program reads the Franctional UCA table and generates
17* internal format for UCA table as well as inverse UCA table.
18* It then writes binary files containing the data: ucadata.dat
19* & invuca.dat
20*
21* date name comments
22* 03/02/2001 synwee added setMaxExpansion
23* 03/07/2001 synwee merged UCA's maxexpansion and tailoring's
24*/
25
26#include "unicode/utypes.h"
27
28#if !UCONFIG_NO_COLLATION
29
30#include "unicode/uchar.h"
31#include "unicode/unistr.h"
32#include "unicode/ucoleitr.h"
33#include "unicode/normlzr.h"
729e4ab9 34#include "normalizer2impl.h"
b75a7d8f 35#include "ucol_elm.h"
46f4442e
A
36#include "ucol_tok.h"
37#include "ucol_cnt.h"
b75a7d8f
A
38#include "unicode/caniter.h"
39#include "cmemory.h"
40
729e4ab9
A
41U_NAMESPACE_USE
42
b75a7d8f
A
43static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status);
44
45U_CDECL_BEGIN
73c04bcf 46static int32_t U_CALLCONV
b75a7d8f 47prefixLookupHash(const UHashTok e) {
46f4442e
A
48 UCAElements *element = (UCAElements *)e.pointer;
49 UChar buf[256];
50 UHashTok key;
51 key.pointer = buf;
52 uprv_memcpy(buf, element->cPoints, element->cSize*sizeof(UChar));
53 buf[element->cSize] = 0;
54 //key.pointer = element->cPoints;
55 //element->cPoints[element->cSize] = 0;
56 return uhash_hashUChars(key);
b75a7d8f
A
57}
58
73c04bcf 59static int8_t U_CALLCONV
b75a7d8f 60prefixLookupComp(const UHashTok e1, const UHashTok e2) {
46f4442e
A
61 UCAElements *element1 = (UCAElements *)e1.pointer;
62 UCAElements *element2 = (UCAElements *)e2.pointer;
63
64 UChar buf1[256];
65 UHashTok key1;
66 key1.pointer = buf1;
67 uprv_memcpy(buf1, element1->cPoints, element1->cSize*sizeof(UChar));
68 buf1[element1->cSize] = 0;
69
70 UChar buf2[256];
71 UHashTok key2;
72 key2.pointer = buf2;
73 uprv_memcpy(buf2, element2->cPoints, element2->cSize*sizeof(UChar));
74 buf2[element2->cSize] = 0;
75
76 return uhash_compareUChars(key1, key2);
b75a7d8f
A
77}
78U_CDECL_END
79
80static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value, UErrorCode *status) {
81 if(U_FAILURE(*status)) {
82 return 0;
83 }
84 if(expansions->CEs == NULL) {
85 expansions->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
86 /* test for NULL */
87 if (expansions->CEs == NULL) {
88 *status = U_MEMORY_ALLOCATION_ERROR;
89 return 0;
90 }
91 expansions->size = INIT_EXP_TABLE_SIZE;
92 expansions->position = 0;
93 }
94
95 if(expansions->position == expansions->size) {
96 uint32_t *newData = (uint32_t *)uprv_realloc(expansions->CEs, 2*expansions->size*sizeof(uint32_t));
97 if(newData == NULL) {
98#ifdef UCOL_DEBUG
99 fprintf(stderr, "out of memory for expansions\n");
100#endif
101 *status = U_MEMORY_ALLOCATION_ERROR;
102 return -1;
103 }
104 expansions->CEs = newData;
105 expansions->size *= 2;
106 }
107
108 expansions->CEs[expansions->position] = value;
109 return(expansions->position++);
110}
111
112U_CAPI tempUCATable* U_EXPORT2
374ca955 113uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) {
46f4442e
A
114 MaxJamoExpansionTable *maxjet;
115 MaxExpansionTable *maxet;
116 tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
b75a7d8f 117 /* test for NULL */
46f4442e
A
118 if (t == NULL) {
119 *status = U_MEMORY_ALLOCATION_ERROR;
120 return NULL;
b75a7d8f 121 }
46f4442e
A
122 uprv_memset(t, 0, sizeof(tempUCATable));
123
124 maxet = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable));
125 if (maxet == NULL) {
73c04bcf 126 goto allocation_failure;
b75a7d8f 127 }
46f4442e
A
128 uprv_memset(maxet, 0, sizeof(MaxExpansionTable));
129 t->maxExpansions = maxet;
73c04bcf 130
46f4442e
A
131 maxjet = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable));
132 if (maxjet == NULL) {
133 goto allocation_failure;
134 }
135 uprv_memset(maxjet, 0, sizeof(MaxJamoExpansionTable));
136 t->maxJamoExpansions = maxjet;
b75a7d8f 137
46f4442e
A
138 t->image = image;
139 t->options = opts;
b75a7d8f 140
46f4442e
A
141 t->UCA = UCA;
142 t->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
b75a7d8f 143 /* test for NULL */
46f4442e
A
144 if (t->expansions == NULL) {
145 goto allocation_failure;
b75a7d8f 146 }
46f4442e
A
147 uprv_memset(t->expansions, 0, sizeof(ExpansionTable));
148
149 t->mapping = utrie_open(NULL, NULL, UCOL_ELM_TRIE_CAPACITY,
150 UCOL_SPECIAL_FLAG | (initTag<<24),
151 UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24),
152 TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
153 if (U_FAILURE(*status)) {
154 goto allocation_failure;
155 }
156 t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, NULL, status);
157 if (U_FAILURE(*status)) {
158 goto allocation_failure;
b75a7d8f 159 }
46f4442e 160 uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
b75a7d8f 161
46f4442e
A
162 t->contractions = uprv_cnttab_open(t->mapping, status);
163 if (U_FAILURE(*status)) {
164 goto cleanup;
165 }
b75a7d8f 166
46f4442e
A
167 /* copy UCA's maxexpansion and merge as we go along */
168 if (UCA != NULL) {
169 /* adding an extra initial value for easier manipulation */
729e4ab9 170 maxet->size = (int32_t)(UCA->lastEndExpansionCE - UCA->endExpansionCE) + 2;
46f4442e
A
171 maxet->position = maxet->size - 1;
172 maxet->endExpansionCE =
173 (uint32_t *)uprv_malloc(sizeof(uint32_t) * maxet->size);
174 /* test for NULL */
175 if (maxet->endExpansionCE == NULL) {
176 goto allocation_failure;
177 }
178 maxet->expansionCESize =
179 (uint8_t *)uprv_malloc(sizeof(uint8_t) * maxet->size);
180 /* test for NULL */
181 if (maxet->expansionCESize == NULL) {
182 goto allocation_failure;
183 }
184 /* initialized value */
185 *(maxet->endExpansionCE) = 0;
186 *(maxet->expansionCESize) = 0;
187 uprv_memcpy(maxet->endExpansionCE + 1, UCA->endExpansionCE,
188 sizeof(uint32_t) * (maxet->size - 1));
189 uprv_memcpy(maxet->expansionCESize + 1, UCA->expansionCESize,
190 sizeof(uint8_t) * (maxet->size - 1));
b75a7d8f 191 }
46f4442e
A
192 else {
193 maxet->size = 0;
b75a7d8f 194 }
46f4442e
A
195 maxjet->endExpansionCE = NULL;
196 maxjet->isV = NULL;
197 maxjet->size = 0;
198 maxjet->position = 0;
199 maxjet->maxLSize = 1;
200 maxjet->maxVSize = 1;
201 maxjet->maxTSize = 1;
b75a7d8f 202
46f4442e 203 t->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
b75a7d8f 204 /* test for NULL */
46f4442e
A
205 if (t->unsafeCP == NULL) {
206 goto allocation_failure;
b75a7d8f 207 }
46f4442e
A
208 t->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
209 /* test for NULL */
210 if (t->contrEndCP == NULL) {
211 goto allocation_failure;
b75a7d8f 212 }
46f4442e
A
213 uprv_memset(t->unsafeCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
214 uprv_memset(t->contrEndCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
215 t->cmLookup = NULL;
216 return t;
b75a7d8f 217
46f4442e
A
218allocation_failure:
219 *status = U_MEMORY_ALLOCATION_ERROR;
220cleanup:
221 uprv_uca_closeTempTable(t);
222 return NULL;
223}
224
225static tempUCATable* U_EXPORT2
226uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) {
227 if(U_FAILURE(*status)) {
b75a7d8f
A
228 return NULL;
229 }
b75a7d8f 230
46f4442e 231 tempUCATable *r = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
b75a7d8f 232 /* test for NULL */
46f4442e 233 if (r == NULL) {
b75a7d8f
A
234 *status = U_MEMORY_ALLOCATION_ERROR;
235 return NULL;
236 }
46f4442e 237 uprv_memset(r, 0, sizeof(tempUCATable));
b75a7d8f 238
46f4442e
A
239 /* mapping */
240 if(t->mapping != NULL) {
241 /*r->mapping = ucmpe32_clone(t->mapping, status);*/
242 r->mapping = utrie_clone(NULL, t->mapping, NULL, 0);
243 }
b75a7d8f 244
46f4442e
A
245 // a hashing clone function would be very nice. We have none currently...
246 // However, we should be good, as closing should not produce any prefixed elements.
247 r->prefixLookup = NULL; // prefixes are not used in closing
b75a7d8f 248
46f4442e
A
249 /* expansions */
250 if(t->expansions != NULL) {
251 r->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
252 /* test for NULL */
253 if (r->expansions == NULL) {
254 *status = U_MEMORY_ALLOCATION_ERROR;
255 goto cleanup;
256 }
257 r->expansions->position = t->expansions->position;
258 r->expansions->size = t->expansions->size;
259 if(t->expansions->CEs != NULL) {
260 r->expansions->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->expansions->size);
261 /* test for NULL */
262 if (r->expansions->CEs == NULL) {
263 *status = U_MEMORY_ALLOCATION_ERROR;
264 goto cleanup;
265 }
266 uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position);
267 } else {
268 r->expansions->CEs = NULL;
269 }
73c04bcf 270 }
46f4442e 271
b75a7d8f 272 if(t->contractions != NULL) {
46f4442e
A
273 r->contractions = uprv_cnttab_clone(t->contractions, status);
274 // Check for cloning failure.
275 if (r->contractions == NULL) {
276 *status = U_MEMORY_ALLOCATION_ERROR;
277 goto cleanup;
278 }
279 r->contractions->mapping = r->mapping;
b75a7d8f 280 }
46f4442e
A
281
282 if(t->maxExpansions != NULL) {
283 r->maxExpansions = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable));
284 /* test for NULL */
285 if (r->maxExpansions == NULL) {
286 *status = U_MEMORY_ALLOCATION_ERROR;
287 goto cleanup;
288 }
289 r->maxExpansions->size = t->maxExpansions->size;
290 r->maxExpansions->position = t->maxExpansions->position;
291 if(t->maxExpansions->endExpansionCE != NULL) {
292 r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size);
293 /* test for NULL */
294 if (r->maxExpansions->endExpansionCE == NULL) {
295 *status = U_MEMORY_ALLOCATION_ERROR;
296 goto cleanup;
297 }
298 uprv_memset(r->maxExpansions->endExpansionCE, 0xDB, sizeof(uint32_t)*t->maxExpansions->size);
299 uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t));
300 } else {
301 r->maxExpansions->endExpansionCE = NULL;
302 }
303 if(t->maxExpansions->expansionCESize != NULL) {
304 r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size);
305 /* test for NULL */
306 if (r->maxExpansions->expansionCESize == NULL) {
307 *status = U_MEMORY_ALLOCATION_ERROR;
308 goto cleanup;
309 }
310 uprv_memset(r->maxExpansions->expansionCESize, 0xDB, sizeof(uint8_t)*t->maxExpansions->size);
311 uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t));
312 } else {
313 r->maxExpansions->expansionCESize = NULL;
314 }
73c04bcf 315 }
b75a7d8f 316
46f4442e
A
317 if(t->maxJamoExpansions != NULL) {
318 r->maxJamoExpansions = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable));
319 /* test for NULL */
320 if (r->maxJamoExpansions == NULL) {
321 *status = U_MEMORY_ALLOCATION_ERROR;
322 goto cleanup;
323 }
324 r->maxJamoExpansions->size = t->maxJamoExpansions->size;
325 r->maxJamoExpansions->position = t->maxJamoExpansions->position;
326 r->maxJamoExpansions->maxLSize = t->maxJamoExpansions->maxLSize;
327 r->maxJamoExpansions->maxVSize = t->maxJamoExpansions->maxVSize;
328 r->maxJamoExpansions->maxTSize = t->maxJamoExpansions->maxTSize;
329 if(t->maxJamoExpansions->size != 0) {
330 r->maxJamoExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxJamoExpansions->size);
331 /* test for NULL */
332 if (r->maxJamoExpansions->endExpansionCE == NULL) {
333 *status = U_MEMORY_ALLOCATION_ERROR;
334 goto cleanup;
335 }
336 uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t));
337 r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size);
338 /* test for NULL */
339 if (r->maxJamoExpansions->isV == NULL) {
340 *status = U_MEMORY_ALLOCATION_ERROR;
341 goto cleanup;
342 }
343 uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool));
344 } else {
345 r->maxJamoExpansions->endExpansionCE = NULL;
346 r->maxJamoExpansions->isV = NULL;
347 }
b75a7d8f
A
348 }
349
46f4442e
A
350 if(t->unsafeCP != NULL) {
351 r->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
352 /* test for NULL */
353 if (r->unsafeCP == NULL) {
354 *status = U_MEMORY_ALLOCATION_ERROR;
355 goto cleanup;
356 }
357 uprv_memcpy(r->unsafeCP, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
73c04bcf 358 }
b75a7d8f 359
46f4442e
A
360 if(t->contrEndCP != NULL) {
361 r->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
362 /* test for NULL */
363 if (r->contrEndCP == NULL) {
364 *status = U_MEMORY_ALLOCATION_ERROR;
365 goto cleanup;
366 }
367 uprv_memcpy(r->contrEndCP, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
b75a7d8f 368 }
b75a7d8f 369
46f4442e
A
370 r->UCA = t->UCA;
371 r->image = t->image;
372 r->options = t->options;
373
374 return r;
375cleanup:
376 uprv_uca_closeTempTable(t);
377 return NULL;
378}
379
380
381U_CAPI void U_EXPORT2
382uprv_uca_closeTempTable(tempUCATable *t) {
383 if(t != NULL) {
384 if (t->expansions != NULL) {
385 uprv_free(t->expansions->CEs);
386 uprv_free(t->expansions);
387 }
388 if(t->contractions != NULL) {
389 uprv_cnttab_close(t->contractions);
390 }
391 if (t->mapping != NULL) {
392 utrie_close(t->mapping);
393 }
394
395 if(t->prefixLookup != NULL) {
396 uhash_close(t->prefixLookup);
397 }
398
399 if (t->maxExpansions != NULL) {
400 uprv_free(t->maxExpansions->endExpansionCE);
401 uprv_free(t->maxExpansions->expansionCESize);
402 uprv_free(t->maxExpansions);
403 }
404
405 if (t->maxJamoExpansions->size > 0) {
406 uprv_free(t->maxJamoExpansions->endExpansionCE);
407 uprv_free(t->maxJamoExpansions->isV);
408 }
409 uprv_free(t->maxJamoExpansions);
410
411 uprv_free(t->unsafeCP);
412 uprv_free(t->contrEndCP);
413
414 if (t->cmLookup != NULL) {
415 uprv_free(t->cmLookup->cPoints);
416 uprv_free(t->cmLookup);
417 }
b75a7d8f 418
46f4442e
A
419 uprv_free(t);
420 }
b75a7d8f
A
421}
422
423/**
424* Looks for the maximum length of all expansion sequences ending with the same
425* collation element. The size required for maxexpansion and maxsize is
426* returned if the arrays are too small.
427* @param endexpansion the last expansion collation element to be added
428* @param expansionsize size of the expansion
429* @param maxexpansion data structure to store the maximum expansion data.
430* @param status error status
431* @returns size of the maxexpansion and maxsize used.
432*/
374ca955 433static int uprv_uca_setMaxExpansion(uint32_t endexpansion,
46f4442e
A
434 uint8_t expansionsize,
435 MaxExpansionTable *maxexpansion,
436 UErrorCode *status)
b75a7d8f 437{
46f4442e
A
438 if (maxexpansion->size == 0) {
439 /* we'll always make the first element 0, for easier manipulation */
440 maxexpansion->endExpansionCE =
441 (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(int32_t));
442 /* test for NULL */
443 if (maxexpansion->endExpansionCE == NULL) {
444 *status = U_MEMORY_ALLOCATION_ERROR;
445 return 0;
446 }
447 *(maxexpansion->endExpansionCE) = 0;
448 maxexpansion->expansionCESize =
449 (uint8_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint8_t));
450 /* test for NULL */;
451 if (maxexpansion->expansionCESize == NULL) {
452 *status = U_MEMORY_ALLOCATION_ERROR;
453 return 0;
454 }
455 *(maxexpansion->expansionCESize) = 0;
456 maxexpansion->size = INIT_EXP_TABLE_SIZE;
457 maxexpansion->position = 0;
b75a7d8f 458 }
46f4442e
A
459
460 if (maxexpansion->position + 1 == maxexpansion->size) {
461 uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
462 2 * maxexpansion->size * sizeof(uint32_t));
463 if (neweece == NULL) {
464 *status = U_MEMORY_ALLOCATION_ERROR;
465 return 0;
466 }
467 maxexpansion->endExpansionCE = neweece;
468
469 uint8_t *neweces = (uint8_t *)uprv_realloc(maxexpansion->expansionCESize,
470 2 * maxexpansion->size * sizeof(uint8_t));
471 if (neweces == NULL) {
472 *status = U_MEMORY_ALLOCATION_ERROR;
473 return 0;
474 }
475 maxexpansion->expansionCESize = neweces;
476 maxexpansion->size *= 2;
b75a7d8f 477 }
46f4442e
A
478
479 uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
480 uint8_t *pexpansionsize = maxexpansion->expansionCESize;
481 int pos = maxexpansion->position;
482
483 uint32_t *start = pendexpansionce;
484 uint32_t *limit = pendexpansionce + pos;
485
486 /* using binary search to determine if last expansion element is
487 already in the array */
488 uint32_t *mid;
489 int result = -1;
490 while (start < limit - 1) {
491 mid = start + ((limit - start) >> 1);
492 if (endexpansion <= *mid) {
493 limit = mid;
494 }
495 else {
496 start = mid;
497 }
b75a7d8f 498 }
46f4442e
A
499
500 if (*start == endexpansion) {
729e4ab9 501 result = (int)(start - pendexpansionce);
46f4442e
A
502 }
503 else if (*limit == endexpansion) {
729e4ab9 504 result = (int)(limit - pendexpansionce);
b75a7d8f 505 }
46f4442e
A
506
507 if (result > -1) {
508 /* found the ce in expansion, we'll just modify the size if it is
509 smaller */
510 uint8_t *currentsize = pexpansionsize + result;
511 if (*currentsize < expansionsize) {
512 *currentsize = expansionsize;
513 }
514 }
515 else {
516 /* we'll need to squeeze the value into the array.
517 initial implementation. */
518 /* shifting the subarray down by 1 */
729e4ab9 519 int shiftsize = (int)((pendexpansionce + pos) - start);
46f4442e
A
520 uint32_t *shiftpos = start + 1;
521 uint8_t *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce);
522
523 /* okay need to rearrange the array into sorted order */
524 if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */
525 *(pendexpansionce + pos + 1) = endexpansion;
526 *(pexpansionsize + pos + 1) = expansionsize;
527 }
528 else {
529 uprv_memmove(shiftpos + 1, shiftpos, shiftsize * sizeof(int32_t));
530 uprv_memmove(sizeshiftpos + 1, sizeshiftpos,
531 shiftsize * sizeof(uint8_t));
532 *shiftpos = endexpansion;
533 *sizeshiftpos = expansionsize;
534 }
535 maxexpansion->position ++;
b75a7d8f
A
536
537#ifdef UCOL_DEBUG
46f4442e
A
538 int temp;
539 UBool found = FALSE;
540 for (temp = 0; temp < maxexpansion->position; temp ++) {
541 if (pendexpansionce[temp] >= pendexpansionce[temp + 1]) {
542 fprintf(stderr, "expansions %d\n", temp);
543 }
544 if (pendexpansionce[temp] == endexpansion) {
545 found =TRUE;
546 if (pexpansionsize[temp] < expansionsize) {
547 fprintf(stderr, "expansions size %d\n", temp);
548 }
549 }
550 }
551 if (pendexpansionce[temp] == endexpansion) {
552 found =TRUE;
553 if (pexpansionsize[temp] < expansionsize) {
554 fprintf(stderr, "expansions size %d\n", temp);
555 }
556 }
557 if (!found)
558 fprintf(stderr, "expansion not found %d\n", temp);
b75a7d8f 559#endif
46f4442e 560 }
b75a7d8f 561
46f4442e 562 return maxexpansion->position;
b75a7d8f
A
563}
564
565/**
566* Sets the maximum length of all jamo expansion sequences ending with the same
567* collation element. The size required for maxexpansion and maxsize is
568* returned if the arrays are too small.
569* @param ch the jamo codepoint
570* @param endexpansion the last expansion collation element to be added
571* @param expansionsize size of the expansion
572* @param maxexpansion data structure to store the maximum expansion data.
573* @param status error status
574* @returns size of the maxexpansion and maxsize used.
575*/
374ca955 576static int uprv_uca_setMaxJamoExpansion(UChar ch,
46f4442e
A
577 uint32_t endexpansion,
578 uint8_t expansionsize,
579 MaxJamoExpansionTable *maxexpansion,
580 UErrorCode *status)
b75a7d8f 581{
46f4442e
A
582 UBool isV = TRUE;
583 if (((uint32_t)ch - 0x1100) <= (0x1112 - 0x1100)) {
584 /* determines L for Jamo, doesn't need to store this since it is never
585 at the end of a expansion */
586 if (maxexpansion->maxLSize < expansionsize) {
587 maxexpansion->maxLSize = expansionsize;
588 }
589 return maxexpansion->position;
b75a7d8f 590 }
46f4442e
A
591
592 if (((uint32_t)ch - 0x1161) <= (0x1175 - 0x1161)) {
593 /* determines V for Jamo */
594 if (maxexpansion->maxVSize < expansionsize) {
595 maxexpansion->maxVSize = expansionsize;
596 }
b75a7d8f 597 }
b75a7d8f 598
46f4442e
A
599 if (((uint32_t)ch - 0x11A8) <= (0x11C2 - 0x11A8)) {
600 isV = FALSE;
601 /* determines T for Jamo */
602 if (maxexpansion->maxTSize < expansionsize) {
603 maxexpansion->maxTSize = expansionsize;
604 }
605 }
606
607 if (maxexpansion->size == 0) {
608 /* we'll always make the first element 0, for easier manipulation */
609 maxexpansion->endExpansionCE =
610 (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint32_t));
611 /* test for NULL */;
612 if (maxexpansion->endExpansionCE == NULL) {
613 *status = U_MEMORY_ALLOCATION_ERROR;
614 return 0;
615 }
616 *(maxexpansion->endExpansionCE) = 0;
617 maxexpansion->isV =
618 (UBool *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(UBool));
619 /* test for NULL */;
620 if (maxexpansion->isV == NULL) {
621 *status = U_MEMORY_ALLOCATION_ERROR;
622 uprv_free(maxexpansion->endExpansionCE);
623 maxexpansion->endExpansionCE = NULL;
624 return 0;
625 }
626 *(maxexpansion->isV) = 0;
627 maxexpansion->size = INIT_EXP_TABLE_SIZE;
628 maxexpansion->position = 0;
629 }
630
631 if (maxexpansion->position + 1 == maxexpansion->size) {
632 maxexpansion->size *= 2;
633 maxexpansion->endExpansionCE = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
634 maxexpansion->size * sizeof(uint32_t));
635 if (maxexpansion->endExpansionCE == NULL) {
b75a7d8f 636#ifdef UCOL_DEBUG
46f4442e 637 fprintf(stderr, "out of memory for maxExpansions\n");
b75a7d8f 638#endif
46f4442e
A
639 *status = U_MEMORY_ALLOCATION_ERROR;
640 return 0;
641 }
642 maxexpansion->isV = (UBool *)uprv_realloc(maxexpansion->isV,
643 maxexpansion->size * sizeof(UBool));
644 if (maxexpansion->isV == NULL) {
73c04bcf 645#ifdef UCOL_DEBUG
46f4442e 646 fprintf(stderr, "out of memory for maxExpansions\n");
73c04bcf 647#endif
46f4442e
A
648 *status = U_MEMORY_ALLOCATION_ERROR;
649 uprv_free(maxexpansion->endExpansionCE);
650 maxexpansion->endExpansionCE = NULL;
651 return 0;
652 }
b75a7d8f 653 }
b75a7d8f 654
46f4442e
A
655 uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
656 int pos = maxexpansion->position;
b75a7d8f 657
46f4442e
A
658 while (pos > 0) {
659 pos --;
660 if (*(pendexpansionce + pos) == endexpansion) {
661 return maxexpansion->position;
662 }
663 }
b75a7d8f 664
46f4442e
A
665 *(pendexpansionce + maxexpansion->position) = endexpansion;
666 *(maxexpansion->isV + maxexpansion->position) = isV;
667 maxexpansion->position ++;
374ca955 668
46f4442e 669 return maxexpansion->position;
b75a7d8f
A
670}
671
672
673static void ContrEndCPSet(uint8_t *table, UChar c) {
674 uint32_t hash;
675 uint8_t *htByte;
676
677 hash = c;
678 if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
679 hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
680 }
681 htByte = &table[hash>>3];
682 *htByte |= (1 << (hash & 7));
683}
684
685
686static void unsafeCPSet(uint8_t *table, UChar c) {
687 uint32_t hash;
688 uint8_t *htByte;
689
690 hash = c;
691 if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
692 if (hash >= 0xd800 && hash <= 0xf8ff) {
693 /* Part of a surrogate, or in private use area. */
694 /* These don't go in the table */
695 return;
696 }
697 hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
698 }
699 htByte = &table[hash>>3];
700 *htByte |= (1 << (hash & 7));
701}
702
46f4442e
A
703static void
704uprv_uca_createCMTable(tempUCATable *t, int32_t noOfCM, UErrorCode *status) {
705 t->cmLookup = (CombinClassTable *)uprv_malloc(sizeof(CombinClassTable));
706 if (t->cmLookup==NULL) {
707 *status = U_MEMORY_ALLOCATION_ERROR;
708 return;
709 }
710 t->cmLookup->cPoints=(UChar *)uprv_malloc(noOfCM*sizeof(UChar));
711 if (t->cmLookup->cPoints ==NULL) {
712 uprv_free(t->cmLookup);
713 t->cmLookup = NULL;
714 *status = U_MEMORY_ALLOCATION_ERROR;
715 return;
716 }
717
718 t->cmLookup->size=noOfCM;
719 uprv_memset(t->cmLookup->index, 0, sizeof(t->cmLookup->index));
720
721 return;
722}
723
724static void
725uprv_uca_copyCMTable(tempUCATable *t, UChar *cm, uint16_t *index) {
726 int32_t count=0;
727
728 for (int32_t i=0; i<256; ++i) {
729 if (index[i]>0) {
730 // cPoints is ordered by combining class value.
731 uprv_memcpy(t->cmLookup->cPoints+count, cm+(i<<8), index[i]*sizeof(UChar));
732 count += index[i];
733 }
734 t->cmLookup->index[i]=count;
735 }
736 return;
737}
b75a7d8f 738
46f4442e
A
739/* 1. to the UnsafeCP hash table, add all chars with combining class != 0 */
740/* 2. build combining marks table for all chars with combining class != 0 */
b75a7d8f
A
741static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
742
743 UChar c;
744 uint16_t fcd; // Hi byte is lead combining class.
46f4442e 745 // lo byte is trailing combing class.
729e4ab9
A
746 const uint16_t *fcdTrieIndex;
747 UChar32 fcdHighStart;
46f4442e
A
748 UBool buildCMTable = (t->cmLookup==NULL); // flag for building combining class table
749 UChar *cm=NULL;
750 uint16_t index[256];
751 int32_t count=0;
729e4ab9 752 fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
b75a7d8f
A
753 if (U_FAILURE(*status)) {
754 return;
755 }
756
46f4442e
A
757 if (buildCMTable) {
758 if (cm==NULL) {
759 cm = (UChar *)uprv_malloc(sizeof(UChar)*UCOL_MAX_CM_TAB);
760 if (cm==NULL) {
761 *status = U_MEMORY_ALLOCATION_ERROR;
762 return;
763 }
764 }
765 uprv_memset(index, 0, sizeof(index));
766 }
b75a7d8f 767 for (c=0; c<0xffff; c++) {
729e4ab9 768 fcd = unorm_getFCD16(fcdTrieIndex, c);
b75a7d8f 769 if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
46f4442e
A
770 (UTF_IS_LEAD(c) && fcd != 0)) {// c is a leading surrogate with some FCD data
771 if (buildCMTable) {
772 uint32_t cClass = fcd & 0xff;
773 //uint32_t temp=(cClass<<8)+index[cClass];
774 cm[(cClass<<8)+index[cClass]] = c; //
775 index[cClass]++;
776 count++;
777 }
778 unsafeCPSet(t->unsafeCP, c);
779 }
780 }
781
782 // copy to cm table
783 if (buildCMTable) {
784 uprv_uca_createCMTable(t, count, status);
785 if(U_FAILURE(*status)) {
786 if (cm!=NULL) {
787 uprv_free(cm);
788 }
789 return;
790 }
791 uprv_uca_copyCMTable(t, cm, index);
b75a7d8f
A
792 }
793
794 if(t->prefixLookup != NULL) {
46f4442e
A
795 int32_t i = -1;
796 const UHashElement *e = NULL;
797 UCAElements *element = NULL;
798 UChar NFCbuf[256];
799 uint32_t NFCbufLen = 0;
800 while((e = uhash_nextElement(t->prefixLookup, &i)) != NULL) {
801 element = (UCAElements *)e->value.pointer;
802 // codepoints here are in the NFD form. We need to add the
803 // first code point of the NFC form to unsafe, because
804 // strcoll needs to backup over them.
805 NFCbufLen = unorm_normalize(element->cPoints, element->cSize, UNORM_NFC, 0,
806 NFCbuf, 256, status);
807 unsafeCPSet(t->unsafeCP, NFCbuf[0]);
808 }
809 }
810
811 if (cm!=NULL) {
812 uprv_free(cm);
b75a7d8f
A
813 }
814}
815
46f4442e
A
816static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
817 UCAElements *element, UErrorCode *status)
818{
819 // currently the longest prefix we're supporting in Japanese is two characters
820 // long. Although this table could quite easily mimic complete contraction stuff
821 // there is no good reason to make a general solution, as it would require some
822 // error prone messing.
b75a7d8f
A
823 CntTable *contractions = t->contractions;
824 UChar32 cp;
825 uint32_t cpsize = 0;
826 UChar *oldCP = element->cPoints;
827 uint32_t oldCPSize = element->cSize;
828
829
830 contractions->currentTag = SPEC_PROC_TAG;
831
832 // here, we will normalize & add prefix to the table.
833 uint32_t j = 0;
834#ifdef UCOL_DEBUG
835 for(j=0; j<element->cSize; j++) {
46f4442e 836 fprintf(stdout, "CP: %04X ", element->cPoints[j]);
b75a7d8f
A
837 }
838 fprintf(stdout, "El: %08X Pref: ", CE);
839 for(j=0; j<element->prefixSize; j++) {
46f4442e 840 fprintf(stdout, "%04X ", element->prefix[j]);
b75a7d8f
A
841 }
842 fprintf(stdout, "%08X ", element->mapCE);
843#endif
844
845 for (j = 1; j<element->prefixSize; j++) { /* First add NFD prefix chars to unsafe CP hash table */
46f4442e
A
846 // Unless it is a trail surrogate, which is handled algoritmically and
847 // shouldn't take up space in the table.
848 if(!(UTF_IS_TRAIL(element->prefix[j]))) {
849 unsafeCPSet(t->unsafeCP, element->prefix[j]);
850 }
b75a7d8f
A
851 }
852
853 UChar tempPrefix = 0;
854
855 for(j = 0; j < /*nfcSize*/element->prefixSize/2; j++) { // prefixes are going to be looked up backwards
46f4442e
A
856 // therefore, we will promptly reverse the prefix buffer...
857 tempPrefix = *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1);
858 *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1) = element->prefix[j];
859 element->prefix[j] = tempPrefix;
b75a7d8f
A
860 }
861
862#ifdef UCOL_DEBUG
863 fprintf(stdout, "Reversed: ");
864 for(j=0; j<element->prefixSize; j++) {
46f4442e 865 fprintf(stdout, "%04X ", element->prefix[j]);
b75a7d8f
A
866 }
867 fprintf(stdout, "%08X\n", element->mapCE);
868#endif
869
870 // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix
871 if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
46f4442e 872 unsafeCPSet(t->unsafeCP, element->cPoints[0]);
b75a7d8f
A
873 }
874
875 // Maybe we need this... To handle prefixes completely in the forward direction...
876 //if(element->cSize == 1) {
877 // if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
878 // ContrEndCPSet(t->contrEndCP, element->cPoints[0]);
879 // }
880 //}
881
882 element->cPoints = element->prefix;
883 element->cSize = element->prefixSize;
884
885 // Add the last char of the contraction to the contraction-end hash table.
46f4442e 886 // unless it is a trail surrogate, which is handled algorithmically and
b75a7d8f
A
887 // shouldn't be in the table
888 if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
46f4442e 889 ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
b75a7d8f
A
890 }
891
892 // First we need to check if contractions starts with a surrogate
893 UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
894
46f4442e 895 // If there are any Jamos in the contraction, we should turn on special
b75a7d8f
A
896 // processing for Jamos
897 if(UCOL_ISJAMO(element->prefix[0])) {
46f4442e 898 t->image->jamoSpecial = TRUE;
b75a7d8f
A
899 }
900 /* then we need to deal with it */
901 /* we could aready have something in table - or we might not */
902
46f4442e
A
903 if(!isPrefix(CE)) {
904 /* if it wasn't contraction, we wouldn't end up here*/
905 int32_t firstContractionOffset = 0;
906 firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
907 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
908 uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->prefix, newCE, status);
909 uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
910 CE = constructContractCE(SPEC_PROC_TAG, firstContractionOffset);
b75a7d8f 911 } else { /* we are adding to existing contraction */
46f4442e
A
912 /* there were already some elements in the table, so we need to add a new contraction */
913 /* Two things can happen here: either the codepoint is already in the table, or it is not */
914 int32_t position = uprv_cnttab_findCP(contractions, CE, *element->prefix, status);
915 if(position > 0) { /* if it is we just continue down the chain */
916 uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
917 uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
918 uprv_cnttab_setContraction(contractions, CE, position, *(element->prefix), newCE, status);
919 } else { /* if it isn't, we will have to create a new sequence */
920 uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
921 uprv_cnttab_insertContraction(contractions, CE, *(element->prefix), element->mapCE, status);
922 }
b75a7d8f
A
923 }
924
925 element->cPoints = oldCP;
926 element->cSize = oldCPSize;
927
928 return CE;
929}
930
931// Note regarding surrogate handling: We are interested only in the single
932// or leading surrogates in a contraction. If a surrogate is somewhere else
933// in the contraction, it is going to be handled as a pair of code units,
934// as it doesn't affect the performance AND handling surrogates specially
935// would complicate code way too much.
374ca955 936static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
46f4442e
A
937 UCAElements *element, UErrorCode *status)
938{
b75a7d8f
A
939 CntTable *contractions = t->contractions;
940 UChar32 cp;
941 uint32_t cpsize = 0;
942
943 contractions->currentTag = CONTRACTION_TAG;
944
945 // First we need to check if contractions starts with a surrogate
946 UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
947
948 if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first
46f4442e
A
949 uint32_t j = 0;
950 for (j=1; j<element->cSize; j++) { /* First add contraction chars to unsafe CP hash table */
951 // Unless it is a trail surrogate, which is handled algoritmically and
952 // shouldn't take up space in the table.
953 if(!(UTF_IS_TRAIL(element->cPoints[j]))) {
954 unsafeCPSet(t->unsafeCP, element->cPoints[j]);
955 }
956 }
957 // Add the last char of the contraction to the contraction-end hash table.
958 // unless it is a trail surrogate, which is handled algorithmically and
959 // shouldn't be in the table
960 if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
961 ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
962 }
b75a7d8f 963
46f4442e
A
964 // If there are any Jamos in the contraction, we should turn on special
965 // processing for Jamos
966 if(UCOL_ISJAMO(element->cPoints[0])) {
967 t->image->jamoSpecial = TRUE;
968 }
969 /* then we need to deal with it */
970 /* we could aready have something in table - or we might not */
971 element->cPoints+=cpsize;
972 element->cSize-=cpsize;
973 if(!isContraction(CE)) {
974 /* if it wasn't contraction, we wouldn't end up here*/
975 int32_t firstContractionOffset = 0;
976 firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
977 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
978 uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
979 uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
980 CE = constructContractCE(CONTRACTION_TAG, firstContractionOffset);
981 } else { /* we are adding to existing contraction */
982 /* there were already some elements in the table, so we need to add a new contraction */
983 /* Two things can happen here: either the codepoint is already in the table, or it is not */
984 int32_t position = uprv_cnttab_findCP(contractions, CE, *element->cPoints, status);
985 if(position > 0) { /* if it is we just continue down the chain */
986 uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
987 uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
988 uprv_cnttab_setContraction(contractions, CE, position, *(element->cPoints), newCE, status);
989 } else { /* if it isn't, we will have to create a new sequence */
990 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
991 uprv_cnttab_insertContraction(contractions, CE, *(element->cPoints), newCE, status);
992 }
993 }
994 element->cPoints-=cpsize;
995 element->cSize+=cpsize;
996 /*ucmpe32_set(t->mapping, cp, CE);*/
997 utrie_set32(t->mapping, cp, CE);
b75a7d8f 998 } else if(!isContraction(CE)) { /* this is just a surrogate, and there is no contraction */
46f4442e
A
999 /*ucmpe32_set(t->mapping, cp, element->mapCE);*/
1000 utrie_set32(t->mapping, cp, element->mapCE);
b75a7d8f 1001 } else { /* fill out the first stage of the contraction with the surrogate CE */
46f4442e
A
1002 uprv_cnttab_changeContraction(contractions, CE, 0, element->mapCE, status);
1003 uprv_cnttab_changeContraction(contractions, CE, 0xFFFF, element->mapCE, status);
b75a7d8f
A
1004 }
1005 return CE;
1006}
1007
1008
1009static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status) {
1010 int32_t firstContractionOffset = 0;
46f4442e 1011 // uint32_t contractionElement = UCOL_NOT_FOUND;
b75a7d8f
A
1012
1013 if(U_FAILURE(*status)) {
1014 return UCOL_NOT_FOUND;
1015 }
1016
1017 /* end of recursion */
1018 if(element->cSize == 1) {
46f4442e
A
1019 if(isCntTableElement(existingCE) && ((UColCETags)getCETag(existingCE) == contractions->currentTag)) {
1020 uprv_cnttab_changeContraction(contractions, existingCE, 0, element->mapCE, status);
1021 uprv_cnttab_changeContraction(contractions, existingCE, 0xFFFF, element->mapCE, status);
1022 return existingCE;
1023 } else {
1024 return element->mapCE; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */
1025 }
b75a7d8f
A
1026 }
1027
1028 /* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */
1029 /* for both backward and forward cycles */
1030
1031 /* we encountered either an empty space or a non-contraction element */
1032 /* this means we are constructing a new contraction sequence */
1033 element->cPoints++;
1034 element->cSize--;
1035 if(!isCntTableElement(existingCE)) {
46f4442e
A
1036 /* if it wasn't contraction, we wouldn't end up here*/
1037 firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, existingCE, status);
b75a7d8f 1038 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
46f4442e
A
1039 uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
1040 uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, status);
1041 existingCE = constructContractCE(contractions->currentTag, firstContractionOffset);
1042 } else { /* we are adding to existing contraction */
1043 /* there were already some elements in the table, so we need to add a new contraction */
1044 /* Two things can happen here: either the codepoint is already in the table, or it is not */
1045 int32_t position = uprv_cnttab_findCP(contractions, existingCE, *element->cPoints, status);
1046 if(position > 0) { /* if it is we just continue down the chain */
1047 uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, status);
1048 uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
1049 uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, status);
1050 } else { /* if it isn't, we will have to create a new sequence */
1051 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
1052 uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, status);
1053 }
b75a7d8f
A
1054 }
1055 element->cPoints--;
1056 element->cSize++;
1057 return existingCE;
1058}
1059
1060static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element, UErrorCode *status) {
46f4442e
A
1061 uint32_t CE = UCOL_NOT_FOUND;
1062 // This should add a completely ignorable element to the
1063 // unsafe table, so that backward iteration will skip
1064 // over it when treating contractions.
b75a7d8f 1065 uint32_t i = 0;
46f4442e
A
1066 if(element->mapCE == 0) {
1067 for(i = 0; i < element->cSize; i++) {
1068 if(!UTF_IS_TRAIL(element->cPoints[i])) {
1069 unsafeCPSet(t->unsafeCP, element->cPoints[i]);
1070 }
1071 }
1072 }
1073 if(element->cSize > 1) { /* we're adding a contraction */
1074 uint32_t i = 0;
1075 UChar32 cp;
1076
1077 UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp);
1078 /*CE = ucmpe32_get(t->mapping, cp);*/
1079 CE = utrie_get32(t->mapping, cp, NULL);
1080
1081 CE = uprv_uca_addContraction(t, CE, element, status);
1082 } else { /* easy case, */
1083 /*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/
1084 CE = utrie_get32(t->mapping, element->cPoints[0], NULL);
1085
1086 if( CE != UCOL_NOT_FOUND) {
1087 if(isCntTableElement(CE) /*isContraction(CE)*/) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */
1088 if(!isPrefix(element->mapCE)) { // we cannot reenter prefix elements - as we are going to create a dead loop
1089 // Only expansions and regular CEs can go here... Contractions will never happen in this place
1090 uprv_cnttab_setContraction(t->contractions, CE, 0, 0, element->mapCE, status);
1091 /* This loop has to change the CE at the end of contraction REDO!*/
1092 uprv_cnttab_changeLastCE(t->contractions, CE, element->mapCE, status);
1093 }
1094 } else {
1095 /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
1096 utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
729e4ab9 1097 if ((element->prefixSize!=0) && (!isSpecial(CE) || (getCETag(CE)!=IMPLICIT_TAG))) {
46f4442e
A
1098 UCAElements *origElem = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1099 /* test for NULL */
1100 if (origElem== NULL) {
1101 *status = U_MEMORY_ALLOCATION_ERROR;
1102 return 0;
1103 }
1104 /* copy the original UCA value */
1105 origElem->prefixSize = 0;
1106 origElem->prefix = NULL;
1107 origElem->cPoints = origElem->uchars;
1108 origElem->cPoints[0] = element->cPoints[0];
1109 origElem->cSize = 1;
1110 origElem->CEs[0]=CE;
1111 origElem->mapCE=CE;
1112 origElem->noOfCEs=1;
1113 uprv_uca_finalizeAddition(t, origElem, status);
1114 uprv_free(origElem);
1115 }
b75a7d8f 1116#ifdef UCOL_DEBUG
46f4442e
A
1117 fprintf(stderr, "Warning - trying to overwrite existing data %08X for cp %04X with %08X\n", CE, element->cPoints[0], element->CEs[0]);
1118 //*status = U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f 1119#endif
46f4442e
A
1120 }
1121 } else {
1122 /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
1123 utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
1124 }
b75a7d8f 1125 }
46f4442e 1126 return CE;
b75a7d8f
A
1127}
1128
1129/* This adds a read element, while testing for existence */
1130U_CAPI uint32_t U_EXPORT2
1131uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) {
46f4442e
A
1132 U_NAMESPACE_USE
1133
1134 ExpansionTable *expansions = t->expansions;
1135
1136 uint32_t i = 1;
1137 uint32_t expansion = 0;
1138 uint32_t CE;
1139
1140 if(U_FAILURE(*status)) {
1141 return 0xFFFF;
1142 }
1143
1144 element->mapCE = 0; // clear mapCE so that we can catch expansions
1145
1146 if(element->noOfCEs == 1) {
1147 element->mapCE = element->CEs[0];
1148 } else {
1149 /* ICU 2.1 long primaries */
1150 /* unfortunately, it looks like we have to look for a long primary here */
1151 /* since in canonical closure we are going to hit some long primaries from */
1152 /* the first phase, and they will come back as continuations/expansions */
1153 /* destroying the effect of the previous opitimization */
1154 /* A long primary is a three byte primary with starting secondaries and tertiaries */
1155 /* It can appear in long runs of only primary differences (like east Asian tailorings) */
1156 /* also, it should not be an expansion, as expansions would break with this */
1157 // This part came in from ucol_bld.cpp
1158 //if(tok->expansion == 0
1159 //&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1
1160 //&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) {
1161 /* we will construct a special CE that will go unchanged to the table */
1162 if(element->noOfCEs == 2 // a two CE expansion
1163 && isContinuation(element->CEs[1]) // which is a continuation
1164 && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation,
1165 && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary
1166 && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary
1167 )
1168 {
b75a7d8f 1169#ifdef UCOL_DEBUG
46f4442e 1170 fprintf(stdout, "Long primary %04X\n", element->cPoints[0]);
b75a7d8f 1171#endif
46f4442e
A
1172 element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special
1173 | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary
1174 | ((element->CEs[1]>>24) & 0xFF); // third byte of primary
1175 }
1176 else {
1177 expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
729e4ab9
A
1178 | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
1179 & 0xFFFFF0));
46f4442e
A
1180
1181 for(i = 1; i<element->noOfCEs; i++) {
1182 uprv_uca_addExpansion(expansions, element->CEs[i], status);
1183 }
1184 if(element->noOfCEs <= 0xF) {
1185 expansion |= element->noOfCEs;
1186 } else {
1187 uprv_uca_addExpansion(expansions, 0, status);
1188 }
1189 element->mapCE = expansion;
1190 uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1],
1191 (uint8_t)element->noOfCEs,
1192 t->maxExpansions,
1193 status);
1194 if(UCOL_ISJAMO(element->cPoints[0])) {
1195 t->image->jamoSpecial = TRUE;
1196 uprv_uca_setMaxJamoExpansion(element->cPoints[0],
1197 element->CEs[element->noOfCEs - 1],
1198 (uint8_t)element->noOfCEs,
1199 t->maxJamoExpansions,
1200 status);
1201 }
1202 if (U_FAILURE(*status)) {
1203 return 0;
1204 }
1205 }
1206 }
1207
1208 // We treat digits differently - they are "uber special" and should be
1209 // processed differently if numeric collation is on.
1210 UChar32 uniChar = 0;
1211 //printElement(element);
1212 if ((element->cSize == 2) && U16_IS_LEAD(element->cPoints[0])){
1213 uniChar = U16_GET_SUPPLEMENTARY(element->cPoints[0], element->cPoints[1]);
1214 } else if (element->cSize == 1){
1215 uniChar = element->cPoints[0];
1216 }
1217
1218 // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only
1219 // one element to the expansion buffer. When we encounter a digit and we don't
1220 // do numeric collation, we will just pick the CE we have and break out of case
1221 // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked
1222 // a special, further processing will occur. If it's a simple CE, we'll return due
1223 // to how the loop is constructed.
1224 if (uniChar != 0 && u_isdigit(uniChar)){
1225 expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element
1226 if(element->mapCE) { // if there is an expansion, we'll pick it here
1227 expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4);
1228 } else {
1229 expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4);
1230 }
1231 element->mapCE = expansion;
1232
1233 // Need to go back to the beginning of the digit string if in the middle!
1234 if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars
1235 unsafeCPSet(t->unsafeCP, (UChar)uniChar);
73c04bcf 1236 }
b75a7d8f 1237 }
b75a7d8f 1238
46f4442e
A
1239 // here we want to add the prefix structure.
1240 // I will try to process it as a reverse contraction, if possible.
1241 // prefix buffer is already reversed.
b75a7d8f 1242
46f4442e
A
1243 if(element->prefixSize!=0) {
1244 // We keep the seen prefix starter elements in a hashtable
1245 // we need it to be able to distinguish between the simple
1246 // codepoints and prefix starters. Also, we need to use it
1247 // for canonical closure.
b75a7d8f 1248
46f4442e 1249 UCAElements *composed = (UCAElements *)uprv_malloc(sizeof(UCAElements));
b75a7d8f 1250 /* test for NULL */
46f4442e 1251 if (composed == NULL) {
b75a7d8f
A
1252 *status = U_MEMORY_ALLOCATION_ERROR;
1253 return 0;
1254 }
46f4442e
A
1255 uprv_memcpy(composed, element, sizeof(UCAElements));
1256 composed->cPoints = composed->uchars;
1257 composed->prefix = composed->prefixChars;
1258
1259 composed->prefixSize = unorm_normalize(element->prefix, element->prefixSize, UNORM_NFC, 0, composed->prefix, 128, status);
1260
1261
1262 if(t->prefixLookup != NULL) {
1263 UCAElements *uCE = (UCAElements *)uhash_get(t->prefixLookup, element);
1264 if(uCE != NULL) { // there is already a set of code points here
1265 element->mapCE = uprv_uca_addPrefix(t, uCE->mapCE, element, status);
1266 } else { // no code points, so this spot is clean
1267 element->mapCE = uprv_uca_addPrefix(t, UCOL_NOT_FOUND, element, status);
1268 uCE = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1269 /* test for NULL */
1270 if (uCE == NULL) {
1271 *status = U_MEMORY_ALLOCATION_ERROR;
1272 return 0;
1273 }
1274 uprv_memcpy(uCE, element, sizeof(UCAElements));
1275 uCE->cPoints = uCE->uchars;
1276 uhash_put(t->prefixLookup, uCE, uCE, status);
1277 }
1278 if(composed->prefixSize != element->prefixSize || uprv_memcmp(composed->prefix, element->prefix, element->prefixSize)) {
1279 // do it!
1280 composed->mapCE = uprv_uca_addPrefix(t, element->mapCE, composed, status);
1281 }
1282 }
1283 uprv_free(composed);
1284 }
1285
1286 // We need to use the canonical iterator here
1287 // the way we do it is to generate the canonically equivalent strings
1288 // for the contraction and then add the sequences that pass FCD check
1289 if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
1290 UnicodeString source(element->cPoints, element->cSize);
1291 CanonicalIterator it(source, *status);
1292 source = it.next();
1293 while(!source.isBogus()) {
1294 if(Normalizer::quickCheck(source, UNORM_FCD, *status) != UNORM_NO) {
1295 element->cSize = source.extract(element->cPoints, 128, *status);
1296 uprv_uca_finalizeAddition(t, element, status);
1297 }
1298 source = it.next();
1299 }
1300 CE = element->mapCE;
1301 } else {
1302 CE = uprv_uca_finalizeAddition(t, element, status);
1303 }
1304
1305 return CE;
b75a7d8f
A
1306}
1307
1308
1309/*void uprv_uca_getMaxExpansionJamo(CompactEIntArray *mapping, */
374ca955 1310static void uprv_uca_getMaxExpansionJamo(UNewTrie *mapping,
46f4442e
A
1311 MaxExpansionTable *maxexpansion,
1312 MaxJamoExpansionTable *maxjamoexpansion,
1313 UBool jamospecial,
1314 UErrorCode *status)
b75a7d8f 1315{
46f4442e
A
1316 const uint32_t VBASE = 0x1161;
1317 const uint32_t TBASE = 0x11A8;
1318 const uint32_t VCOUNT = 21;
1319 const uint32_t TCOUNT = 28;
1320
1321 uint32_t v = VBASE + VCOUNT - 1;
1322 uint32_t t = TBASE + TCOUNT - 1;
1323 uint32_t ce;
1324
1325 while (v >= VBASE) {
1326 /*ce = ucmpe32_get(mapping, v);*/
1327 ce = utrie_get32(mapping, v, NULL);
1328 if (ce < UCOL_SPECIAL_FLAG) {
1329 uprv_uca_setMaxExpansion(ce, 2, maxexpansion, status);
1330 }
1331 v --;
1332 }
1333
1334 while (t >= TBASE)
1335 {
1336 /*ce = ucmpe32_get(mapping, t);*/
1337 ce = utrie_get32(mapping, t, NULL);
1338 if (ce < UCOL_SPECIAL_FLAG) {
1339 uprv_uca_setMaxExpansion(ce, 3, maxexpansion, status);
1340 }
1341 t --;
1342 }
1343 /* According to the docs, 99% of the time, the Jamo will not be special */
1344 if (jamospecial) {
1345 /* gets the max expansion in all unicode characters */
1346 int count = maxjamoexpansion->position;
1347 uint8_t maxTSize = (uint8_t)(maxjamoexpansion->maxLSize +
1348 maxjamoexpansion->maxVSize +
1349 maxjamoexpansion->maxTSize);
1350 uint8_t maxVSize = (uint8_t)(maxjamoexpansion->maxLSize +
1351 maxjamoexpansion->maxVSize);
1352
1353 while (count > 0) {
1354 count --;
1355 if (*(maxjamoexpansion->isV + count) == TRUE) {
b75a7d8f 1356 uprv_uca_setMaxExpansion(
46f4442e
A
1357 *(maxjamoexpansion->endExpansionCE + count),
1358 maxVSize, maxexpansion, status);
1359 }
1360 else {
b75a7d8f 1361 uprv_uca_setMaxExpansion(
46f4442e
A
1362 *(maxjamoexpansion->endExpansionCE + count),
1363 maxTSize, maxexpansion, status);
1364 }
1365 }
1366 }
b75a7d8f
A
1367}
1368
1369U_CDECL_BEGIN
1370static inline uint32_t U_CALLCONV
1371getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset)
1372{
46f4442e
A
1373 uint32_t value;
1374 uint32_t tag;
1375 UChar32 limit;
1376 UBool inBlockZero;
1377
1378 limit=start+0x400;
1379 while(start<limit) {
1380 value=utrie_get32(trie, start, &inBlockZero);
1381 tag = getCETag(value);
1382 if(inBlockZero == TRUE) {
1383 start+=UTRIE_DATA_BLOCK_LENGTH;
1384 } else if(!(isSpecial(value) && (tag == IMPLICIT_TAG || tag == NOT_FOUND_TAG))) {
1385 /* These are values that are starting in either UCA (IMPLICIT_TAG) or in the
1386 * tailorings (NOT_FOUND_TAG). Presence of these tags means that there is
1387 * nothing in this position and that it should be skipped.
1388 */
b75a7d8f 1389#ifdef UCOL_DEBUG
46f4442e
A
1390 static int32_t count = 1;
1391 fprintf(stdout, "%i, Folded %08X, value %08X\n", count++, start, value);
b75a7d8f 1392#endif
46f4442e
A
1393 return (uint32_t)(UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24) | offset);
1394 } else {
1395 ++start;
1396 }
1397 }
1398 return 0;
b75a7d8f
A
1399}
1400U_CDECL_END
1401
1402#ifdef UCOL_DEBUG
1403// This is a debug function to print the contents of a trie.
1404// It is used in conjuction with the code around utrie_unserialize call
729e4ab9 1405UBool enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
46f4442e
A
1406 if(start<0x10000) {
1407 fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value);
1408 } else {
1409 fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value);
1410 }
729e4ab9 1411 return TRUE;
b75a7d8f
A
1412}
1413
1414int32_t
1415myGetFoldingOffset(uint32_t data) {
46f4442e
A
1416 if(data > UCOL_NOT_FOUND && getCETag(data) == SURROGATE_TAG) {
1417 return (data&0xFFFFFF);
1418 } else {
1419 return 0;
1420 }
b75a7d8f
A
1421}
1422#endif
1423
1424U_CAPI UCATableHeader* U_EXPORT2
1425uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) {
1426 /*CompactEIntArray *mapping = t->mapping;*/
1427 UNewTrie *mapping = t->mapping;
1428 ExpansionTable *expansions = t->expansions;
1429 CntTable *contractions = t->contractions;
1430 MaxExpansionTable *maxexpansion = t->maxExpansions;
1431
1432 if(U_FAILURE(*status)) {
1433 return NULL;
1434 }
1435
1436 uint32_t beforeContractions = (uint32_t)((headersize+paddedsize(expansions->position*sizeof(uint32_t)))/sizeof(UChar));
1437
1438 int32_t contractionsSize = 0;
1439 contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status);
1440
1441 /* the following operation depends on the trie data. Therefore, we have to do it before */
1442 /* the trie is compacted */
1443 /* sets jamo expansions */
1444 uprv_uca_getMaxExpansionJamo(mapping, maxexpansion, t->maxJamoExpansions,
46f4442e 1445 t->image->jamoSpecial, status);
b75a7d8f
A
1446
1447 /*ucmpe32_compact(mapping);*/
1448 /*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/
1449 /*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/
1450 /*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/
1451
1452 // After setting the jamo expansions, compact the trie and get the needed size
1453 int32_t mappingSize = utrie_serialize(mapping, NULL, 0, getFoldedValue /*getFoldedValue*/, FALSE, status);
1454
1455 uint32_t tableOffset = 0;
1456 uint8_t *dataStart;
1457
1458 /* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */
1459
1460 uint32_t toAllocate =(uint32_t)(headersize+
46f4442e
A
1461 paddedsize(expansions->position*sizeof(uint32_t))+
1462 paddedsize(mappingSize)+
1463 paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t)))+
1464 //paddedsize(0x100*sizeof(uint32_t)) /* Latin1 is now included in the trie */
1465 /* maxexpansion array */
1466 + paddedsize(maxexpansion->position * sizeof(uint32_t)) +
1467 /* maxexpansion size array */
1468 paddedsize(maxexpansion->position * sizeof(uint8_t)) +
1469 paddedsize(UCOL_UNSAFECP_TABLE_SIZE) + /* Unsafe chars */
1470 paddedsize(UCOL_UNSAFECP_TABLE_SIZE)); /* Contraction Ending chars */
b75a7d8f
A
1471
1472
1473 dataStart = (uint8_t *)uprv_malloc(toAllocate);
1474 /* test for NULL */
1475 if (dataStart == NULL) {
1476 *status = U_MEMORY_ALLOCATION_ERROR;
1477 return NULL;
1478 }
1479
1480 UCATableHeader *myData = (UCATableHeader *)dataStart;
374ca955
A
1481 // Please, do reset all the fields!
1482 uprv_memset(dataStart, 0, toAllocate);
1483 // Make sure we know this is reset
1484 myData->magic = UCOL_HEADER_MAGIC;
1485 myData->isBigEndian = U_IS_BIG_ENDIAN;
1486 myData->charSetFamily = U_CHARSET_FAMILY;
1487 myData->formatVersion[0] = UCA_FORMAT_VERSION_0;
1488 myData->formatVersion[1] = UCA_FORMAT_VERSION_1;
1489 myData->formatVersion[2] = UCA_FORMAT_VERSION_2;
1490 myData->formatVersion[3] = UCA_FORMAT_VERSION_3;
1491 myData->jamoSpecial = t->image->jamoSpecial;
1492
1493 // Don't copy stuff from UCA header!
1494 //uprv_memcpy(myData, t->image, sizeof(UCATableHeader));
b75a7d8f
A
1495
1496 myData->contractionSize = contractionsSize;
1497
1498 tableOffset += (uint32_t)(paddedsize(sizeof(UCATableHeader)));
1499
1500 myData->options = tableOffset;
1501 uprv_memcpy(dataStart+tableOffset, t->options, sizeof(UColOptionSet));
1502 tableOffset += (uint32_t)(paddedsize(sizeof(UColOptionSet)));
1503
1504 /* copy expansions */
1505 /*myData->expansion = (uint32_t *)dataStart+tableOffset;*/
1506 myData->expansion = tableOffset;
1507 uprv_memcpy(dataStart+tableOffset, expansions->CEs, expansions->position*sizeof(uint32_t));
1508 tableOffset += (uint32_t)(paddedsize(expansions->position*sizeof(uint32_t)));
1509
1510 /* contractions block */
1511 if(contractionsSize != 0) {
46f4442e
A
1512 /* copy contraction index */
1513 /*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/
1514 myData->contractionIndex = tableOffset;
1515 uprv_memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar));
1516 tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(UChar)));
1517
1518 /* copy contraction collation elements */
1519 /*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/
1520 myData->contractionCEs = tableOffset;
1521 uprv_memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t));
1522 tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t)));
b75a7d8f 1523 } else {
46f4442e
A
1524 myData->contractionIndex = 0;
1525 myData->contractionCEs = 0;
b75a7d8f
A
1526 }
1527
1528 /* copy mapping table */
1529 /*myData->mappingPosition = dataStart+tableOffset;*/
1530 /*myData->mappingPosition = tableOffset;*/
1531 /*uprv_memcpy(dataStart+tableOffset, flattened, mappingSize);*/
1532
1533 myData->mappingPosition = tableOffset;
1534 utrie_serialize(mapping, dataStart+tableOffset, toAllocate-tableOffset, getFoldedValue, FALSE, status);
1535#ifdef UCOL_DEBUG
1536 // This is debug code to dump the contents of the trie. It needs two functions defined above
1537 {
46f4442e
A
1538 UTrie UCAt = { 0 };
1539 uint32_t trieWord;
1540 utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status);
1541 UCAt.getFoldingOffset = myGetFoldingOffset;
1542 if(U_SUCCESS(*status)) {
1543 utrie_enum(&UCAt, NULL, enumRange, NULL);
1544 }
729e4ab9 1545 trieWord = UTRIE_GET32_FROM_LEAD(&UCAt, 0xDC01);
b75a7d8f
A
1546 }
1547#endif
1548 tableOffset += paddedsize(mappingSize);
1549
1550
1551 int32_t i = 0;
1552
1553 /* copy max expansion table */
1554 myData->endExpansionCE = tableOffset;
73c04bcf 1555 myData->endExpansionCECount = maxexpansion->position - 1;
b75a7d8f
A
1556 /* not copying the first element which is a dummy */
1557 uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1,
46f4442e 1558 (maxexpansion->position - 1) * sizeof(uint32_t));
73c04bcf 1559 tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint32_t)));
b75a7d8f
A
1560 myData->expansionCESize = tableOffset;
1561 uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1,
46f4442e 1562 (maxexpansion->position - 1) * sizeof(uint8_t));
73c04bcf 1563 tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint8_t)));
b75a7d8f
A
1564
1565 /* Unsafe chars table. Finish it off, then copy it. */
1566 uprv_uca_unsafeCPAddCCNZ(t, status);
1567 if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */
46f4442e
A
1568 for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1569 t->unsafeCP[i] |= t->UCA->unsafeCP[i];
1570 }
b75a7d8f
A
1571 }
1572 myData->unsafeCP = tableOffset;
1573 uprv_memcpy(dataStart + tableOffset, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
1574 tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1575
1576
1577 /* Finish building Contraction Ending chars hash table and then copy it out. */
1578 if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */
1579 for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1580 t->contrEndCP[i] |= t->UCA->contrEndCP[i];
1581 }
1582 }
1583 myData->contrEndCP = tableOffset;
1584 uprv_memcpy(dataStart + tableOffset, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
1585 tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1586
1587 if(tableOffset != toAllocate) {
1588#ifdef UCOL_DEBUG
1589 fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset);
1590#endif
1591 *status = U_INTERNAL_PROGRAM_ERROR;
1592 uprv_free(dataStart);
1593 return 0;
1594 }
1595
1596 myData->size = tableOffset;
1597 /* This should happen upon ressurection */
1598 /*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/
1599 /*uprv_mstrm_close(ms);*/
1600 return myData;
1601}
1602
1603
1604struct enumStruct {
46f4442e
A
1605 tempUCATable *t;
1606 UCollator *tempColl;
1607 UCollationElements* colEl;
729e4ab9
A
1608 const Normalizer2Impl *nfcImpl;
1609 UnicodeSet *closed;
46f4442e
A
1610 int32_t noOfClosures;
1611 UErrorCode *status;
b75a7d8f
A
1612};
1613U_CDECL_BEGIN
1614static UBool U_CALLCONV
1615_enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1616
46f4442e
A
1617 if (type != U_UNASSIGNED && type != U_PRIVATE_USE_CHAR) { // if the range is assigned - we might ommit more categories later
1618 UErrorCode *status = ((enumStruct *)context)->status;
1619 tempUCATable *t = ((enumStruct *)context)->t;
1620 UCollator *tempColl = ((enumStruct *)context)->tempColl;
1621 UCollationElements* colEl = ((enumStruct *)context)->colEl;
1622 UCAElements el;
729e4ab9
A
1623 UChar decompBuffer[4];
1624 const UChar *decomp;
46f4442e
A
1625 int32_t noOfDec = 0;
1626
1627 UChar32 u32 = 0;
1628 UChar comp[2];
1629 uint32_t len = 0;
1630
1631 for(u32 = start; u32 < limit; u32++) {
729e4ab9
A
1632 decomp = ((enumStruct *)context)->nfcImpl->
1633 getDecomposition(u32, decompBuffer, noOfDec);
46f4442e
A
1634 //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1
1635 //|| (noOfDec == 1 && *decomp != (UChar)u32))
729e4ab9 1636 if(decomp != NULL)
46f4442e
A
1637 {
1638 len = 0;
729e4ab9 1639 U16_APPEND_UNSAFE(comp, len, u32);
46f4442e 1640 if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) {
b75a7d8f 1641#ifdef UCOL_DEBUG
729e4ab9
A
1642 fprintf(stderr, "Closure: U+%04X -> ", u32);
1643 UChar32 c;
1644 int32_t i = 0;
1645 while(i < noOfDec) {
1646 U16_NEXT(decomp, i, noOfDec, c);
1647 fprintf(stderr, "%04X ", c);
1648 }
1649 fprintf(stderr, "\n");
1650 // print CEs for code point vs. decomposition
1651 fprintf(stderr, "U+%04X CEs: ", u32);
1652 UCollationElements *iter = ucol_openElements(tempColl, comp, len, status);
1653 int32_t ce;
1654 while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) {
1655 fprintf(stderr, "%08X ", ce);
1656 }
1657 fprintf(stderr, "\nDecomp CEs: ");
1658 ucol_setText(iter, decomp, noOfDec, status);
1659 while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) {
1660 fprintf(stderr, "%08X ", ce);
46f4442e
A
1661 }
1662 fprintf(stderr, "\n");
729e4ab9 1663 ucol_closeElements(iter);
b75a7d8f 1664#endif
729e4ab9
A
1665 if(((enumStruct *)context)->closed != NULL) {
1666 ((enumStruct *)context)->closed->add(u32);
1667 }
46f4442e 1668 ((enumStruct *)context)->noOfClosures++;
729e4ab9 1669 el.cPoints = (UChar *)decomp;
46f4442e
A
1670 el.cSize = noOfDec;
1671 el.noOfCEs = 0;
1672 el.prefix = el.prefixChars;
1673 el.prefixSize = 0;
1674
1675 UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el);
1676 el.cPoints = comp;
1677 el.cSize = len;
1678 el.prefix = el.prefixChars;
1679 el.prefixSize = 0;
1680 if(prefix == NULL) {
1681 el.noOfCEs = 0;
1682 ucol_setText(colEl, decomp, noOfDec, status);
1683 while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
1684 el.noOfCEs++;
1685 }
1686 } else {
1687 el.noOfCEs = 1;
1688 el.CEs[0] = prefix->mapCE;
1689 // This character uses a prefix. We have to add it
1690 // to the unsafe table, as it decomposed form is already
1691 // in. In Japanese, this happens for \u309e & \u30fe
1692 // Since unsafeCPSet is static in ucol_elm, we are going
1693 // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function
1694 }
1695 uprv_uca_addAnElement(t, &el, status);
1696 }
b75a7d8f 1697 }
46f4442e
A
1698 }
1699 }
1700 return TRUE;
b75a7d8f
A
1701}
1702U_CDECL_END
1703
46f4442e
A
1704static void
1705uprv_uca_setMapCE(tempUCATable *t, UCAElements *element, UErrorCode *status) {
1706 uint32_t expansion = 0;
1707 int32_t j;
1708
1709 ExpansionTable *expansions = t->expansions;
1710 if(element->noOfCEs == 2 // a two CE expansion
1711 && isContinuation(element->CEs[1]) // which is a continuation
1712 && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation,
1713 && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary
1714 && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary
1715 ) {
1716 element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special
1717 | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary
1718 | ((element->CEs[1]>>24) & 0xFF); // third byte of primary
1719 } else {
1720 expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
729e4ab9
A
1721 | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
1722 & 0xFFFFF0));
46f4442e
A
1723
1724 for(j = 1; j<(int32_t)element->noOfCEs; j++) {
1725 uprv_uca_addExpansion(expansions, element->CEs[j], status);
1726 }
1727 if(element->noOfCEs <= 0xF) {
1728 expansion |= element->noOfCEs;
1729 } else {
1730 uprv_uca_addExpansion(expansions, 0, status);
1731 }
1732 element->mapCE = expansion;
1733 uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1],
1734 (uint8_t)element->noOfCEs,
1735 t->maxExpansions,
1736 status);
1737 }
1738}
1739
1740static void
1741uprv_uca_addFCD4AccentedContractions(tempUCATable *t,
1742 UCollationElements* colEl,
1743 UChar *data,
1744 int32_t len,
1745 UCAElements *el,
1746 UErrorCode *status) {
1747 UChar decomp[256], comp[256];
1748 int32_t decLen, compLen;
1749
1750 decLen = unorm_normalize(data, len, UNORM_NFD, 0, decomp, 256, status);
1751 compLen = unorm_normalize(data, len, UNORM_NFC, 0, comp, 256, status);
1752 decomp[decLen] = comp[compLen] = 0;
1753
1754 el->cPoints = decomp;
1755 el->cSize = decLen;
1756 el->noOfCEs = 0;
1757 el->prefixSize = 0;
1758 el->prefix = el->prefixChars;
1759
1760 UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el);
1761 el->cPoints = comp;
1762 el->cSize = compLen;
1763 el->prefix = el->prefixChars;
1764 el->prefixSize = 0;
1765 if(prefix == NULL) {
1766 el->noOfCEs = 0;
1767 ucol_setText(colEl, decomp, decLen, status);
1768 while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
1769 el->noOfCEs++;
1770 }
1771 uprv_uca_setMapCE(t, el, status);
1772 uprv_uca_addAnElement(t, el, status);
1773 }
1774}
1775
1776static void
1777uprv_uca_addMultiCMContractions(tempUCATable *t,
1778 UCollationElements* colEl,
1779 tempTailorContext *c,
1780 UCAElements *el,
1781 UErrorCode *status) {
1782 CombinClassTable *cmLookup = t->cmLookup;
1783 UChar newDecomp[256];
1784 int32_t maxComp, newDecLen;
729e4ab9
A
1785 UChar32 fcdHighStart;
1786 const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
1787 if (U_FAILURE(*status)) {
1788 return;
1789 }
1790 int16_t curClass = (unorm_getFCD16(fcdTrieIndex, c->tailoringCM) & 0xff);
46f4442e
A
1791 CompData *precomp = c->precomp;
1792 int32_t compLen = c->compLen;
1793 UChar *comp = c->comp;
1794 maxComp = c->precompLen;
1795
1796 for (int32_t j=0; j < maxComp; j++) {
1797 int32_t count=0;
1798 do {
1799 if ( count == 0 ) { // Decompose the saved precomposed char.
1800 UChar temp[2];
1801 temp[0]=precomp[j].cp;
1802 temp[1]=0;
1803 newDecLen = unorm_normalize(temp, 1, UNORM_NFD, 0,
1804 newDecomp, sizeof(newDecomp)/sizeof(UChar), status);
1805 newDecomp[newDecLen++] = cmLookup->cPoints[c->cmPos];
1806 }
1807 else { // swap 2 combining marks when they are equal.
1808 uprv_memcpy(newDecomp, c->decomp, sizeof(UChar)*(c->decompLen));
1809 newDecLen = c->decompLen;
1810 newDecomp[newDecLen++] = precomp[j].cClass;
1811 }
1812 newDecomp[newDecLen] = 0;
1813 compLen = unorm_normalize(newDecomp, newDecLen, UNORM_NFC, 0,
1814 comp, 256, status);
1815 if (compLen==1) {
1816 comp[compLen++] = newDecomp[newDecLen++] = c->tailoringCM;
1817 comp[compLen] = newDecomp[newDecLen] = 0;
1818 el->cPoints = newDecomp;
1819 el->cSize = newDecLen;
1820
1821 UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el);
1822 el->cPoints = c->comp;
1823 el->cSize = compLen;
1824 el->prefix = el->prefixChars;
1825 el->prefixSize = 0;
1826 if(prefix == NULL) {
1827 el->noOfCEs = 0;
1828 ucol_setText(colEl, newDecomp, newDecLen, status);
1829 while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
1830 el->noOfCEs++;
1831 }
1832 uprv_uca_setMapCE(t, el, status);
1833 uprv_uca_finalizeAddition(t, el, status);
1834
1835 // Save the current precomposed char and its class to find any
1836 // other combining mark combinations.
1837 precomp[c->precompLen].cp=comp[0];
1838 precomp[c->precompLen].cClass = curClass;
1839 c->precompLen++;
1840 }
1841 }
1842 } while (++count<2 && (precomp[j].cClass == curClass));
1843 }
1844
1845}
1846
1847static void
1848uprv_uca_addTailCanonicalClosures(tempUCATable *t,
1849 UCollationElements* colEl,
1850 UChar baseCh,
1851 UChar cMark,
1852 UCAElements *el,
1853 UErrorCode *status) {
1854 CombinClassTable *cmLookup = t->cmLookup;
729e4ab9
A
1855 UChar32 fcdHighStart;
1856 const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
1857 if (U_FAILURE(*status)) {
1858 return;
1859 }
1860 int16_t maxIndex = (unorm_getFCD16(fcdTrieIndex, cMark) & 0xff );
46f4442e
A
1861 UCAElements element;
1862 uint16_t *index;
1863 UChar decomp[256];
1864 UChar comp[256];
1865 CompData precomp[256]; // precomposed array
1866 int32_t precompLen = 0; // count for precomp
1867 int32_t i, len, decompLen, curClass, replacedPos;
1868 tempTailorContext c;
1869
1870 if ( cmLookup == NULL ) {
1871 return;
1872 }
1873 index = cmLookup->index;
729e4ab9
A
1874 int32_t cClass=(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff);
1875 maxIndex = (int32_t)index[(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff)-1];
46f4442e
A
1876 c.comp = comp;
1877 c.decomp = decomp;
1878 c.precomp = precomp;
1879 c.tailoringCM = cMark;
1880
1881 if (cClass>0) {
1882 maxIndex = (int32_t)index[cClass-1];
1883 }
1884 else {
1885 maxIndex=0;
1886 }
1887 decomp[0]=baseCh;
1888 for ( i=0; i<maxIndex ; i++ ) {
1889 decomp[1] = cmLookup->cPoints[i];
1890 decomp[2]=0;
1891 decompLen=2;
1892 len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status);
1893 if (len==1) {
1894 // Save the current precomposed char and its class to find any
1895 // other combining mark combinations.
1896 precomp[precompLen].cp=comp[0];
1897 curClass = precomp[precompLen].cClass =
729e4ab9 1898 index[unorm_getFCD16(fcdTrieIndex, decomp[1]) & 0xff];
46f4442e
A
1899 precompLen++;
1900 replacedPos=0;
1901 for (decompLen=0; decompLen< (int32_t)el->cSize; decompLen++) {
1902 decomp[decompLen] = el->cPoints[decompLen];
1903 if (decomp[decompLen]==cMark) {
1904 replacedPos = decompLen; // record the position for later use
1905 }
1906 }
1907 if ( replacedPos != 0 ) {
1908 decomp[replacedPos]=cmLookup->cPoints[i];
1909 }
1910 decomp[decompLen] = 0;
1911 len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status);
1912 comp[len++] = decomp[decompLen++] = cMark;
1913 comp[len] = decomp[decompLen] = 0;
1914 element.cPoints = decomp;
1915 element.cSize = decompLen;
1916 element.noOfCEs = 0;
1917 element.prefix = el->prefixChars;
1918 element.prefixSize = 0;
1919
1920 UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &element);
1921 element.cPoints = comp;
1922 element.cSize = len;
1923 element.prefix = el->prefixChars;
1924 element.prefixSize = 0;
1925 if(prefix == NULL) {
1926 element.noOfCEs = 0;
1927 ucol_setText(colEl, decomp, decompLen, status);
1928 while((element.CEs[element.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
1929 element.noOfCEs++;
1930 }
1931 uprv_uca_setMapCE(t, &element, status);
1932 uprv_uca_finalizeAddition(t, &element, status);
1933 }
1934
1935 // This is a fix for tailoring contractions with accented
1936 // character at the end of contraction string.
1937 if ((len>2) &&
729e4ab9 1938 (unorm_getFCD16(fcdTrieIndex, comp[len-2]) & 0xff00)==0) {
46f4442e
A
1939 uprv_uca_addFCD4AccentedContractions(t, colEl, comp, len, &element, status);
1940 }
1941
1942 if (precompLen >1) {
1943 c.compLen = len;
1944 c.decompLen = decompLen;
1945 c.precompLen = precompLen;
1946 c.cmPos = i;
1947 uprv_uca_addMultiCMContractions(t, colEl, &c, &element, status);
1948 precompLen = c.precompLen;
1949 }
1950 }
1951 }
1952}
1953
1954U_CFUNC int32_t U_EXPORT2
1955uprv_uca_canonicalClosure(tempUCATable *t,
1956 UColTokenParser *src,
729e4ab9 1957 UnicodeSet *closed,
46f4442e 1958 UErrorCode *status)
b75a7d8f 1959{
46f4442e 1960 enumStruct context;
729e4ab9 1961 context.closed = closed;
46f4442e
A
1962 context.noOfClosures = 0;
1963 UCAElements el;
1964 UColToken *tok;
1965 uint32_t i = 0, j = 0;
1966 UChar baseChar, firstCM;
729e4ab9
A
1967 UChar32 fcdHighStart;
1968 const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
1969 context.nfcImpl=Normalizer2Factory::getNFCImpl(*status);
1970 if(U_FAILURE(*status)) {
46f4442e
A
1971 return 0;
1972 }
1973
b75a7d8f
A
1974 UCollator *tempColl = NULL;
1975 tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status);
46f4442e
A
1976 // Check for null pointer
1977 if (U_FAILURE(*status)) {
1978 return 0;
1979 }
b75a7d8f
A
1980
1981 UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status);
374ca955 1982 tempColl = ucol_initCollator(tempData, 0, t->UCA, status);
46f4442e
A
1983 if ( tempTable->cmLookup != NULL ) {
1984 t->cmLookup = tempTable->cmLookup; // copy over to t
1985 tempTable->cmLookup = NULL;
1986 }
1987 uprv_uca_closeTempTable(tempTable);
b75a7d8f
A
1988
1989 if(U_SUCCESS(*status)) {
46f4442e
A
1990 tempColl->ucaRules = NULL;
1991 tempColl->actualLocale = NULL;
1992 tempColl->validLocale = NULL;
1993 tempColl->requestedLocale = NULL;
1994 tempColl->hasRealData = TRUE;
1995 tempColl->freeImageOnClose = TRUE;
b75a7d8f 1996 } else if(tempData != 0) {
46f4442e 1997 uprv_free(tempData);
b75a7d8f
A
1998 }
1999
2000 /* produce canonical closure */
2001 UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status);
46f4442e
A
2002 // Check for null pointer
2003 if (U_FAILURE(*status)) {
2004 return 0;
2005 }
b75a7d8f
A
2006 context.t = t;
2007 context.tempColl = tempColl;
2008 context.colEl = colEl;
2009 context.status = status;
2010 u_enumCharTypes(_enumCategoryRangeClosureCategory, &context);
2011
46f4442e
A
2012 if ( (src==NULL) || !src->buildCCTabFlag ) {
2013 ucol_closeElements(colEl);
2014 ucol_close(tempColl);
2015 return context.noOfClosures; // no extra contraction needed to add
2016 }
2017
2018 for (i=0; i < src->resultLen; i++) {
2019 baseChar = firstCM= (UChar)0;
2020 tok = src->lh[i].first;
2021 while (tok != NULL && U_SUCCESS(*status)) {
2022 el.prefix = el.prefixChars;
2023 el.cPoints = el.uchars;
2024 if(tok->prefix != 0) {
2025 el.prefixSize = tok->prefix>>24;
2026 uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el.prefixSize*sizeof(UChar));
2027
2028 el.cSize = (tok->source >> 24)-(tok->prefix>>24);
2029 uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar));
2030 } else {
2031 el.prefixSize = 0;
2032 *el.prefix = 0;
2033
2034 el.cSize = (tok->source >> 24);
2035 uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar));
2036 }
2037 if(src->UCA != NULL) {
2038 for(j = 0; j<el.cSize; j++) {
729e4ab9 2039 int16_t fcd = unorm_getFCD16(fcdTrieIndex, el.cPoints[j]);
46f4442e
A
2040 if ( (fcd & 0xff) == 0 ) {
2041 baseChar = el.cPoints[j]; // last base character
2042 firstCM=0; // reset combining mark value
2043 }
2044 else {
2045 if ( (baseChar!=0) && (firstCM==0) ) {
2046 firstCM = el.cPoints[j]; // first combining mark
2047 }
2048 }
2049 }
2050 }
2051 if ( (baseChar!= (UChar)0) && (firstCM != (UChar)0) ) {
2052 // find all the canonical rules
2053 uprv_uca_addTailCanonicalClosures(t, colEl, baseChar, firstCM, &el, status);
2054 }
2055 tok = tok->next;
2056 }
2057 }
b75a7d8f
A
2058 ucol_closeElements(colEl);
2059 ucol_close(tempColl);
729e4ab9 2060
46f4442e 2061 return context.noOfClosures;
b75a7d8f
A
2062}
2063
374ca955 2064#endif /* #if !UCONFIG_NO_COLLATION */