]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_elm.cpp
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_elm.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
374ca955 4* Copyright (C) 2001-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: ucaelems.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created 02/22/2001
14* created by: Vladimir Weinstein
15*
16* This program reads the Franctional UCA table and generates
17* internal format for UCA table as well as inverse UCA table.
18* It then writes binary files containing the data: ucadata.dat
19* & invuca.dat
20*
21* date name comments
22* 03/02/2001 synwee added setMaxExpansion
23* 03/07/2001 synwee merged UCA's maxexpansion and tailoring's
24*/
25
26#include "unicode/utypes.h"
27
28#if !UCONFIG_NO_COLLATION
29
30#include "unicode/uchar.h"
31#include "unicode/unistr.h"
32#include "unicode/ucoleitr.h"
33#include "unicode/normlzr.h"
34#include "ucol_elm.h"
35#include "unormimp.h"
36#include "unicode/caniter.h"
37#include "cmemory.h"
38
39U_NAMESPACE_BEGIN
40
41static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status);
42
43U_CDECL_BEGIN
44static int32_t U_EXPORT2 U_CALLCONV
45prefixLookupHash(const UHashTok e) {
46 UCAElements *element = (UCAElements *)e.pointer;
47 UChar buf[256];
48 UHashTok key;
49 key.pointer = buf;
50 uprv_memcpy(buf, element->cPoints, element->cSize*sizeof(UChar));
51 buf[element->cSize] = 0;
52 //key.pointer = element->cPoints;
53 //element->cPoints[element->cSize] = 0;
54 return uhash_hashUChars(key);
55}
56
57static int8_t U_EXPORT2 U_CALLCONV
58prefixLookupComp(const UHashTok e1, const UHashTok e2) {
59 UCAElements *element1 = (UCAElements *)e1.pointer;
60 UCAElements *element2 = (UCAElements *)e2.pointer;
61
62 UChar buf1[256];
63 UHashTok key1;
64 key1.pointer = buf1;
65 uprv_memcpy(buf1, element1->cPoints, element1->cSize*sizeof(UChar));
66 buf1[element1->cSize] = 0;
67
68 UChar buf2[256];
69 UHashTok key2;
70 key2.pointer = buf2;
71 uprv_memcpy(buf2, element2->cPoints, element2->cSize*sizeof(UChar));
72 buf2[element2->cSize] = 0;
73
74 return uhash_compareUChars(key1, key2);
75}
76U_CDECL_END
77
78static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value, UErrorCode *status) {
79 if(U_FAILURE(*status)) {
80 return 0;
81 }
82 if(expansions->CEs == NULL) {
83 expansions->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
84 /* test for NULL */
85 if (expansions->CEs == NULL) {
86 *status = U_MEMORY_ALLOCATION_ERROR;
87 return 0;
88 }
89 expansions->size = INIT_EXP_TABLE_SIZE;
90 expansions->position = 0;
91 }
92
93 if(expansions->position == expansions->size) {
94 uint32_t *newData = (uint32_t *)uprv_realloc(expansions->CEs, 2*expansions->size*sizeof(uint32_t));
95 if(newData == NULL) {
96#ifdef UCOL_DEBUG
97 fprintf(stderr, "out of memory for expansions\n");
98#endif
99 *status = U_MEMORY_ALLOCATION_ERROR;
100 return -1;
101 }
102 expansions->CEs = newData;
103 expansions->size *= 2;
104 }
105
106 expansions->CEs[expansions->position] = value;
107 return(expansions->position++);
108}
109
110U_CAPI tempUCATable* U_EXPORT2
374ca955 111uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) {
b75a7d8f
A
112 tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
113 /* test for NULL */
114 if (t == NULL) {
115 *status = U_MEMORY_ALLOCATION_ERROR;
116 return NULL;
117 }
118 MaxExpansionTable *maxet = (MaxExpansionTable *)uprv_malloc(
119 sizeof(MaxExpansionTable));
120 /* test for NULL */
121 if (maxet == NULL) {
122 *status = U_MEMORY_ALLOCATION_ERROR;
123 uprv_free(t);
124 return NULL;
125 }
126 MaxJamoExpansionTable *maxjet = (MaxJamoExpansionTable *)uprv_malloc(
127 sizeof(MaxJamoExpansionTable));
128 /* test for NULL */
129 if (maxjet == NULL) {
130 *status = U_MEMORY_ALLOCATION_ERROR;
131 uprv_free(t);
132 uprv_free(maxet);
133 return NULL;
134 }
135 t->image = image;
136 t->options = opts;
137
138 t->UCA = UCA;
139 t->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
140 /* test for NULL */
141 if (t->expansions == NULL) {
142 *status = U_MEMORY_ALLOCATION_ERROR;
143 uprv_free(t);
144 uprv_free(maxet);
145 uprv_free(maxjet);
146 return NULL;
147 }
148 uprv_memset(t->expansions, 0, sizeof(ExpansionTable));
149 /*t->mapping = ucmpe32_open(UCOL_SPECIAL_FLAG | (initTag<<24), UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24), UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG<<24), status);*/
374ca955
A
150 /*t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1*/
151
152 t->mapping = utrie_open(NULL, NULL, 0x100000,
153 UCOL_SPECIAL_FLAG | (initTag<<24),
154 UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24),
155 TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
b75a7d8f
A
156 t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, status);
157 uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
158
159 t->contractions = uprv_cnttab_open(t->mapping, status);
160
161 /* copy UCA's maxexpansion and merge as we go along */
162 t->maxExpansions = maxet;
163 if (UCA != NULL) {
164 /* adding an extra initial value for easier manipulation */
165 maxet->size = (UCA->lastEndExpansionCE - UCA->endExpansionCE)
166 + 2;
167 maxet->position = maxet->size - 1;
168 maxet->endExpansionCE =
169 (uint32_t *)uprv_malloc(sizeof(uint32_t) * maxet->size);
170 /* test for NULL */
171 if (maxet->endExpansionCE == NULL) {
172 *status = U_MEMORY_ALLOCATION_ERROR;
173 return NULL;
174 }
175 maxet->expansionCESize =
176 (uint8_t *)uprv_malloc(sizeof(uint8_t) * maxet->size);
177 /* test for NULL */
178 if (maxet->expansionCESize == NULL) {
179 *status = U_MEMORY_ALLOCATION_ERROR;
180 uprv_free(maxet->endExpansionCE);
181 return NULL;
182 }
183 /* initialized value */
184 *(maxet->endExpansionCE) = 0;
185 *(maxet->expansionCESize) = 0;
186 uprv_memcpy(maxet->endExpansionCE + 1, UCA->endExpansionCE,
187 sizeof(uint32_t) * (maxet->size - 1));
188 uprv_memcpy(maxet->expansionCESize + 1, UCA->expansionCESize,
189 sizeof(uint8_t) * (maxet->size - 1));
190 }
191 else {
192 maxet->size = 0;
193 }
194 t->maxJamoExpansions = maxjet;
195 maxjet->endExpansionCE = NULL;
196 maxjet->isV = NULL;
197 maxjet->size = 0;
198 maxjet->position = 0;
199 maxjet->maxLSize = 1;
200 maxjet->maxVSize = 1;
201 maxjet->maxTSize = 1;
202
203 t->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
204 /* test for NULL */
205 if (t->unsafeCP == NULL) {
206 *status = U_MEMORY_ALLOCATION_ERROR;
207 return NULL;
208 }
209 t->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
210 /* test for NULL */
211 if (t->contrEndCP == NULL) {
212 *status = U_MEMORY_ALLOCATION_ERROR;
213 uprv_free(t->unsafeCP);
214 return NULL;
215 }
216 uprv_memset(t->unsafeCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
217 uprv_memset(t->contrEndCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
218return t;
219}
220
221U_CAPI tempUCATable* U_EXPORT2
222uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) {
223 if(U_FAILURE(*status)) {
224 return NULL;
225 }
226
227 tempUCATable *r = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
228 /* test for NULL */
229 if (r == NULL) {
230 *status = U_MEMORY_ALLOCATION_ERROR;
231 return NULL;
232 }
233 uprv_memset(r, 0, sizeof(tempUCATable));
234
235 /* mapping */
236 if(t->mapping != NULL) {
237 /*r->mapping = ucmpe32_clone(t->mapping, status);*/
238 r->mapping = utrie_clone(NULL, t->mapping, NULL, 0);
239 }
240
241 // a hashing clone function would be very nice. We have none currently...
242 // However, we should be good, as closing should not produce any prefixed elements.
243 r->prefixLookup = NULL; // prefixes are not used in closing
244
245 /* expansions */
246 if(t->expansions != NULL) {
247 r->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
248 /* test for NULL */
249 if (r->expansions == NULL) {
250 *status = U_MEMORY_ALLOCATION_ERROR;
251 return NULL;
252 }
253 r->expansions->position = t->expansions->position;
254 r->expansions->size = t->expansions->size;
255 if(t->expansions->CEs != NULL) {
256 r->expansions->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->expansions->size);
257 /* test for NULL */
258 if (r->expansions->CEs == NULL) {
259 *status = U_MEMORY_ALLOCATION_ERROR;
260 return NULL;
261 }
374ca955 262 uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position);
b75a7d8f
A
263 } else {
264 r->expansions->CEs = NULL;
265 }
266 }
267
268 if(t->contractions != NULL) {
269 r->contractions = uprv_cnttab_clone(t->contractions, status);
270 r->contractions->mapping = r->mapping;
271 }
272
273 if(t->maxExpansions != NULL) {
274 r->maxExpansions = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable));
275 /* test for NULL */
276 if (r->maxExpansions == NULL) {
277 *status = U_MEMORY_ALLOCATION_ERROR;
278 return NULL;
279 }
280 r->maxExpansions->size = t->maxExpansions->size;
281 r->maxExpansions->position = t->maxExpansions->position;
282 if(t->maxExpansions->endExpansionCE != NULL) {
283 r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size);
284 /* test for NULL */
285 if (r->maxExpansions->endExpansionCE == NULL) {
286 *status = U_MEMORY_ALLOCATION_ERROR;
287 return NULL;
288 }
374ca955 289 uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t));
b75a7d8f
A
290 } else {
291 r->maxExpansions->endExpansionCE = NULL;
292 }
293 if(t->maxExpansions->expansionCESize != NULL) {
294 r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size);
295 /* test for NULL */
296 if (r->maxExpansions->expansionCESize == NULL) {
297 *status = U_MEMORY_ALLOCATION_ERROR;
298 return NULL;
299 }
374ca955 300 uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t));
b75a7d8f
A
301 } else {
302 r->maxExpansions->expansionCESize = NULL;
303 }
304 }
305
306 if(t->maxJamoExpansions != NULL) {
307 r->maxJamoExpansions = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable));
308 /* test for NULL */
309 if (r->maxJamoExpansions == NULL) {
310 *status = U_MEMORY_ALLOCATION_ERROR;
311 return NULL;
312 }
313 r->maxJamoExpansions->size = t->maxJamoExpansions->size;
314 r->maxJamoExpansions->position = t->maxJamoExpansions->position;
315 r->maxJamoExpansions->maxLSize = t->maxJamoExpansions->maxLSize;
316 r->maxJamoExpansions->maxVSize = t->maxJamoExpansions->maxVSize;
317 r->maxJamoExpansions->maxTSize = t->maxJamoExpansions->maxTSize;
318 if(t->maxJamoExpansions->size != 0) {
319 r->maxJamoExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxJamoExpansions->size);
320 /* test for NULL */
321 if (r->maxJamoExpansions->endExpansionCE == NULL) {
322 *status = U_MEMORY_ALLOCATION_ERROR;
323 return NULL;
324 }
374ca955 325 uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t));
b75a7d8f
A
326 r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size);
327 /* test for NULL */
328 if (r->maxJamoExpansions->isV == NULL) {
329 *status = U_MEMORY_ALLOCATION_ERROR;
330 return NULL;
331 }
374ca955 332 uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool));
b75a7d8f
A
333 } else {
334 r->maxJamoExpansions->endExpansionCE = NULL;
335 r->maxJamoExpansions->isV = NULL;
336 }
337 }
338
339 if(t->unsafeCP != NULL) {
340 r->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
341 /* test for NULL */
342 if (r->unsafeCP == NULL) {
343 *status = U_MEMORY_ALLOCATION_ERROR;
344 return NULL;
345 }
346 uprv_memcpy(r->unsafeCP, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
347 }
348
349 if(t->contrEndCP != NULL) {
350 r->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
351 /* test for NULL */
352 if (r->contrEndCP == NULL) {
353 *status = U_MEMORY_ALLOCATION_ERROR;
354 return NULL;
355 }
356 uprv_memcpy(r->contrEndCP, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
357 }
358
359 r->UCA = t->UCA;
360 r->image = t->image;
361 r->options = t->options;
362
363 return r;
364}
365
366
367U_CAPI void U_EXPORT2
368uprv_uca_closeTempTable(tempUCATable *t) {
369 if(t != NULL) {
370 uprv_free(t->expansions->CEs);
371 uprv_free(t->expansions);
372 if(t->contractions != NULL) {
373 uprv_cnttab_close(t->contractions);
374 }
375 /*ucmpe32_close(t->mapping);*/
376 utrie_close(t->mapping);
377
378 if(t->prefixLookup != NULL) {
379 uhash_close(t->prefixLookup);
380 }
381
382 uprv_free(t->maxExpansions->endExpansionCE);
383 uprv_free(t->maxExpansions->expansionCESize);
384 uprv_free(t->maxExpansions);
385
386 if (t->maxJamoExpansions->size > 0) {
387 uprv_free(t->maxJamoExpansions->endExpansionCE);
388 uprv_free(t->maxJamoExpansions->isV);
389 }
390 uprv_free(t->maxJamoExpansions);
391
392 uprv_free(t->unsafeCP);
393 uprv_free(t->contrEndCP);
394
395 uprv_free(t);
396 }
397}
398
399/**
400* Looks for the maximum length of all expansion sequences ending with the same
401* collation element. The size required for maxexpansion and maxsize is
402* returned if the arrays are too small.
403* @param endexpansion the last expansion collation element to be added
404* @param expansionsize size of the expansion
405* @param maxexpansion data structure to store the maximum expansion data.
406* @param status error status
407* @returns size of the maxexpansion and maxsize used.
408*/
374ca955 409static int uprv_uca_setMaxExpansion(uint32_t endexpansion,
b75a7d8f
A
410 uint8_t expansionsize,
411 MaxExpansionTable *maxexpansion,
412 UErrorCode *status)
413{
414 if (maxexpansion->size == 0) {
415 /* we'll always make the first element 0, for easier manipulation */
416 maxexpansion->endExpansionCE =
417 (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(int32_t));
418 /* test for NULL */
419 if (maxexpansion->endExpansionCE == NULL) {
420 *status = U_MEMORY_ALLOCATION_ERROR;
421 return 0;
422 }
423 *(maxexpansion->endExpansionCE) = 0;
424 maxexpansion->expansionCESize =
425 (uint8_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint8_t));
426 /* test for NULL */;
427 if (maxexpansion->expansionCESize == NULL) {
428 *status = U_MEMORY_ALLOCATION_ERROR;
429 return 0;
430 }
431 *(maxexpansion->expansionCESize) = 0;
432 maxexpansion->size = INIT_EXP_TABLE_SIZE;
433 maxexpansion->position = 0;
434 }
435
436 if (maxexpansion->position + 1 == maxexpansion->size) {
437 uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
438 2 * maxexpansion->size * sizeof(uint32_t));
439 uint8_t *neweces = (uint8_t *)uprv_realloc(maxexpansion->expansionCESize,
440 2 * maxexpansion->size * sizeof(uint8_t));
441 if (neweece == NULL || neweces == NULL) {
442#ifdef UCOL_DEBUG
443 fprintf(stderr, "out of memory for maxExpansions\n");
444#endif
445 *status = U_MEMORY_ALLOCATION_ERROR;
446 return -1;
447 }
448 maxexpansion->endExpansionCE = neweece;
449 maxexpansion->expansionCESize = neweces;
450 maxexpansion->size *= 2;
451 }
452
453 uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
454 uint8_t *pexpansionsize = maxexpansion->expansionCESize;
455 int pos = maxexpansion->position;
456
457 uint32_t *start = pendexpansionce;
458 uint32_t *limit = pendexpansionce + pos;
459
460 /* using binary search to determine if last expansion element is
461 already in the array */
462 uint32_t *mid;
463 int result = -1;
464 while (start < limit - 1) {
465 mid = start + ((limit - start) >> 1);
466 if (endexpansion <= *mid) {
467 limit = mid;
468 }
469 else {
470 start = mid;
471 }
472 }
374ca955 473
b75a7d8f
A
474 if (*start == endexpansion) {
475 result = start - pendexpansionce;
476 }
477 else
478 if (*limit == endexpansion) {
479 result = limit - pendexpansionce;
480 }
374ca955 481
b75a7d8f
A
482 if (result > -1) {
483 /* found the ce in expansion, we'll just modify the size if it is
484 smaller */
485 uint8_t *currentsize = pexpansionsize + result;
486 if (*currentsize < expansionsize) {
487 *currentsize = expansionsize;
488 }
489 }
490 else {
491 /* we'll need to squeeze the value into the array.
492 initial implementation. */
493 /* shifting the subarray down by 1 */
494 int shiftsize = (pendexpansionce + pos) - start;
495 uint32_t *shiftpos = start + 1;
496 uint8_t *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce);
374ca955 497
b75a7d8f 498 /* okay need to rearrange the array into sorted order */
374ca955 499 if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */
b75a7d8f
A
500 *(pendexpansionce + pos + 1) = endexpansion;
501 *(pexpansionsize + pos + 1) = expansionsize;
502 }
503 else {
504 uprv_memmove(shiftpos + 1, shiftpos, shiftsize * sizeof(int32_t));
505 uprv_memmove(sizeshiftpos + 1, sizeshiftpos,
506 shiftsize * sizeof(uint8_t));
507 *shiftpos = endexpansion;
508 *sizeshiftpos = expansionsize;
509 }
510 maxexpansion->position ++;
511
512#ifdef UCOL_DEBUG
513 int temp;
514 UBool found = FALSE;
515 for (temp = 0; temp < maxexpansion->position; temp ++) {
516 if (pendexpansionce[temp] >= pendexpansionce[temp + 1]) {
517 fprintf(stderr, "expansions %d\n", temp);
518 }
519 if (pendexpansionce[temp] == endexpansion) {
520 found =TRUE;
521 if (pexpansionsize[temp] < expansionsize) {
522 fprintf(stderr, "expansions size %d\n", temp);
523 }
524 }
525 }
526 if (pendexpansionce[temp] == endexpansion) {
527 found =TRUE;
528 if (pexpansionsize[temp] < expansionsize) {
529 fprintf(stderr, "expansions size %d\n", temp);
530 }
531 }
532 if (!found)
533 fprintf(stderr, "expansion not found %d\n", temp);
534#endif
535 }
536
537 return maxexpansion->position;
538}
539
540/**
541* Sets the maximum length of all jamo expansion sequences ending with the same
542* collation element. The size required for maxexpansion and maxsize is
543* returned if the arrays are too small.
544* @param ch the jamo codepoint
545* @param endexpansion the last expansion collation element to be added
546* @param expansionsize size of the expansion
547* @param maxexpansion data structure to store the maximum expansion data.
548* @param status error status
549* @returns size of the maxexpansion and maxsize used.
550*/
374ca955 551static int uprv_uca_setMaxJamoExpansion(UChar ch,
b75a7d8f
A
552 uint32_t endexpansion,
553 uint8_t expansionsize,
554 MaxJamoExpansionTable *maxexpansion,
555 UErrorCode *status)
556{
557 UBool isV = TRUE;
558 if (((uint32_t)ch - 0x1100) <= (0x1112 - 0x1100)) {
559 /* determines L for Jamo, doesn't need to store this since it is never
560 at the end of a expansion */
561 if (maxexpansion->maxLSize < expansionsize) {
562 maxexpansion->maxLSize = expansionsize;
563 }
564 return maxexpansion->position;
565 }
566
567 if (((uint32_t)ch - 0x1161) <= (0x1175 - 0x1161)) {
568 /* determines V for Jamo */
569 if (maxexpansion->maxVSize < expansionsize) {
570 maxexpansion->maxVSize = expansionsize;
571 }
572 }
573
574 if (((uint32_t)ch - 0x11A8) <= (0x11C2 - 0x11A8)) {
575 isV = FALSE;
576 /* determines T for Jamo */
577 if (maxexpansion->maxTSize < expansionsize) {
578 maxexpansion->maxTSize = expansionsize;
579 }
580 }
581
582 if (maxexpansion->size == 0) {
583 /* we'll always make the first element 0, for easier manipulation */
584 maxexpansion->endExpansionCE =
585 (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint32_t));
586 /* test for NULL */;
587 if (maxexpansion->endExpansionCE == NULL) {
588 *status = U_MEMORY_ALLOCATION_ERROR;
589 return 0;
590 }
591 *(maxexpansion->endExpansionCE) = 0;
592 maxexpansion->isV =
593 (UBool *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(UBool));
594 /* test for NULL */;
595 if (maxexpansion->isV == NULL) {
596 *status = U_MEMORY_ALLOCATION_ERROR;
597 return 0;
598 }
599 *(maxexpansion->isV) = 0;
600 maxexpansion->size = INIT_EXP_TABLE_SIZE;
601 maxexpansion->position = 0;
602 }
603
604 if (maxexpansion->position + 1 == maxexpansion->size) {
605 uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
606 2 * maxexpansion->size * sizeof(uint32_t));
607 UBool *newisV = (UBool *)uprv_realloc(maxexpansion->isV,
608 2 * maxexpansion->size * sizeof(UBool));
609 if (neweece == NULL || newisV == NULL) {
610#ifdef UCOL_DEBUG
611 fprintf(stderr, "out of memory for maxExpansions\n");
612#endif
613 *status = U_MEMORY_ALLOCATION_ERROR;
614 return -1;
615 }
616 maxexpansion->endExpansionCE = neweece;
617 maxexpansion->isV = newisV;
618 maxexpansion->size *= 2;
619 }
620
621 uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
622 int pos = maxexpansion->position;
623
624 while (pos > 0) {
625 pos --;
626 if (*(pendexpansionce + pos) == endexpansion) {
627 return maxexpansion->position;
628 }
629 }
630
631 *(pendexpansionce + maxexpansion->position) = endexpansion;
632 *(maxexpansion->isV + maxexpansion->position) = isV;
633 maxexpansion->position ++;
374ca955 634
b75a7d8f
A
635 return maxexpansion->position;
636}
637
638
639static void ContrEndCPSet(uint8_t *table, UChar c) {
640 uint32_t hash;
641 uint8_t *htByte;
642
643 hash = c;
644 if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
645 hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
646 }
647 htByte = &table[hash>>3];
648 *htByte |= (1 << (hash & 7));
649}
650
651
652static void unsafeCPSet(uint8_t *table, UChar c) {
653 uint32_t hash;
654 uint8_t *htByte;
655
656 hash = c;
657 if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
658 if (hash >= 0xd800 && hash <= 0xf8ff) {
659 /* Part of a surrogate, or in private use area. */
660 /* These don't go in the table */
661 return;
662 }
663 hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
664 }
665 htByte = &table[hash>>3];
666 *htByte |= (1 << (hash & 7));
667}
668
669
670/* to the UnsafeCP hash table, add all chars with combining class != 0 */
671static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
672
673 UChar c;
674 uint16_t fcd; // Hi byte is lead combining class.
675 // lo byte is trailing combing class.
676 const uint16_t *fcdTrieData;
677
678 fcdTrieData = unorm_getFCDTrie(status);
679 if (U_FAILURE(*status)) {
680 return;
681 }
682
683 for (c=0; c<0xffff; c++) {
684 fcd = unorm_getFCD16(fcdTrieData, c);
685 if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
686 (UTF_IS_LEAD(c) && fcd != 0)) // c is a leading surrogate with some FCD data
687 unsafeCPSet(t->unsafeCP, c);
688 }
689
690 if(t->prefixLookup != NULL) {
691 int32_t i = -1;
692 const UHashElement *e = NULL;
693 UCAElements *element = NULL;
694 UChar NFCbuf[256];
695 uint32_t NFCbufLen = 0;
696 while((e = uhash_nextElement(t->prefixLookup, &i)) != NULL) {
697 element = (UCAElements *)e->value.pointer;
698 // codepoints here are in the NFD form. We need to add the
699 // first code point of the NFC form to unsafe, because
700 // strcoll needs to backup over them.
701 NFCbufLen = unorm_normalize(element->cPoints, element->cSize, UNORM_NFC, 0,
702 NFCbuf, 256, status);
703 unsafeCPSet(t->unsafeCP, NFCbuf[0]);
704 }
705 }
706}
707
374ca955 708static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
b75a7d8f
A
709 UCAElements *element, UErrorCode *status) {
710 // currently the longest prefix we're supporting in Japanese is two characters
711 // long. Although this table could quite easily mimic complete contraction stuff
712 // there is no good reason to make a general solution, as it would require some
713 // error prone messing.
714 CntTable *contractions = t->contractions;
715 UChar32 cp;
716 uint32_t cpsize = 0;
717 UChar *oldCP = element->cPoints;
718 uint32_t oldCPSize = element->cSize;
719
720
721 contractions->currentTag = SPEC_PROC_TAG;
722
723 // here, we will normalize & add prefix to the table.
724 uint32_t j = 0;
725#ifdef UCOL_DEBUG
726 for(j=0; j<element->cSize; j++) {
727 fprintf(stdout, "CP: %04X ", element->cPoints[j]);
728 }
729 fprintf(stdout, "El: %08X Pref: ", CE);
730 for(j=0; j<element->prefixSize; j++) {
731 fprintf(stdout, "%04X ", element->prefix[j]);
732 }
733 fprintf(stdout, "%08X ", element->mapCE);
734#endif
735
736 for (j = 1; j<element->prefixSize; j++) { /* First add NFD prefix chars to unsafe CP hash table */
737 // Unless it is a trail surrogate, which is handled algoritmically and
738 // shouldn't take up space in the table.
739 if(!(UTF_IS_TRAIL(element->prefix[j]))) {
740 unsafeCPSet(t->unsafeCP, element->prefix[j]);
741 }
742 }
743
744 UChar tempPrefix = 0;
745
746 for(j = 0; j < /*nfcSize*/element->prefixSize/2; j++) { // prefixes are going to be looked up backwards
747 // therefore, we will promptly reverse the prefix buffer...
748 tempPrefix = *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1);
749 *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1) = element->prefix[j];
750 element->prefix[j] = tempPrefix;
751 }
752
753#ifdef UCOL_DEBUG
754 fprintf(stdout, "Reversed: ");
755 for(j=0; j<element->prefixSize; j++) {
756 fprintf(stdout, "%04X ", element->prefix[j]);
757 }
758 fprintf(stdout, "%08X\n", element->mapCE);
759#endif
760
761 // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix
762 if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
763 unsafeCPSet(t->unsafeCP, element->cPoints[0]);
764 }
765
766 // Maybe we need this... To handle prefixes completely in the forward direction...
767 //if(element->cSize == 1) {
768 // if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
769 // ContrEndCPSet(t->contrEndCP, element->cPoints[0]);
770 // }
771 //}
772
773 element->cPoints = element->prefix;
774 element->cSize = element->prefixSize;
775
776 // Add the last char of the contraction to the contraction-end hash table.
777 // unless it is a trail surrogate, which is handled algorithmically and
778 // shouldn't be in the table
779 if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
780 ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
781 }
782
783 // First we need to check if contractions starts with a surrogate
784 UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
785
786 // If there are any Jamos in the contraction, we should turn on special
787 // processing for Jamos
788 if(UCOL_ISJAMO(element->prefix[0])) {
789 t->image->jamoSpecial = TRUE;
790 }
791 /* then we need to deal with it */
792 /* we could aready have something in table - or we might not */
793
794 if(!isPrefix(CE)) {
795 /* if it wasn't contraction, we wouldn't end up here*/
796 int32_t firstContractionOffset = 0;
797 int32_t contractionOffset = 0;
798 firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
799 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
800 contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->prefix, newCE, status);
801 contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
802 CE = constructContractCE(SPEC_PROC_TAG, firstContractionOffset);
803 } else { /* we are adding to existing contraction */
804 /* there were already some elements in the table, so we need to add a new contraction */
805 /* Two things can happen here: either the codepoint is already in the table, or it is not */
806 int32_t position = uprv_cnttab_findCP(contractions, CE, *element->prefix, status);
807 if(position > 0) { /* if it is we just continue down the chain */
808 uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
809 uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
810 uprv_cnttab_setContraction(contractions, CE, position, *(element->prefix), newCE, status);
811 } else { /* if it isn't, we will have to create a new sequence */
812 uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
813 uprv_cnttab_insertContraction(contractions, CE, *(element->prefix), element->mapCE, status);
814 }
815 }
816
817 element->cPoints = oldCP;
818 element->cSize = oldCPSize;
819
820 return CE;
821}
822
823// Note regarding surrogate handling: We are interested only in the single
824// or leading surrogates in a contraction. If a surrogate is somewhere else
825// in the contraction, it is going to be handled as a pair of code units,
826// as it doesn't affect the performance AND handling surrogates specially
827// would complicate code way too much.
374ca955 828static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
b75a7d8f
A
829 UCAElements *element, UErrorCode *status) {
830 CntTable *contractions = t->contractions;
831 UChar32 cp;
832 uint32_t cpsize = 0;
833
834 contractions->currentTag = CONTRACTION_TAG;
835
836 // First we need to check if contractions starts with a surrogate
837 UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
838
839 if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first
840 uint32_t j = 0;
841 for (j=1; j<element->cSize; j++) { /* First add contraction chars to unsafe CP hash table */
842 // Unless it is a trail surrogate, which is handled algoritmically and
843 // shouldn't take up space in the table.
844 if(!(UTF_IS_TRAIL(element->cPoints[j]))) {
845 unsafeCPSet(t->unsafeCP, element->cPoints[j]);
846 }
847 }
848 // Add the last char of the contraction to the contraction-end hash table.
849 // unless it is a trail surrogate, which is handled algorithmically and
850 // shouldn't be in the table
851 if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
852 ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
853 }
854
855 // If there are any Jamos in the contraction, we should turn on special
856 // processing for Jamos
857 if(UCOL_ISJAMO(element->cPoints[0])) {
858 t->image->jamoSpecial = TRUE;
859 }
860 /* then we need to deal with it */
861 /* we could aready have something in table - or we might not */
862 element->cPoints+=cpsize;
863 element->cSize-=cpsize;
864 if(!isContraction(CE)) {
865 /* if it wasn't contraction, we wouldn't end up here*/
866 int32_t firstContractionOffset = 0;
867 int32_t contractionOffset = 0;
868 firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
869 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
870 contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
871 contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
872 CE = constructContractCE(CONTRACTION_TAG, firstContractionOffset);
873 } else { /* we are adding to existing contraction */
874 /* there were already some elements in the table, so we need to add a new contraction */
875 /* Two things can happen here: either the codepoint is already in the table, or it is not */
876 int32_t position = uprv_cnttab_findCP(contractions, CE, *element->cPoints, status);
877 if(position > 0) { /* if it is we just continue down the chain */
878 uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
879 uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
880 uprv_cnttab_setContraction(contractions, CE, position, *(element->cPoints), newCE, status);
881 } else { /* if it isn't, we will have to create a new sequence */
882 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
883 uprv_cnttab_insertContraction(contractions, CE, *(element->cPoints), newCE, status);
884 }
885 }
886 element->cPoints-=cpsize;
887 element->cSize+=cpsize;
888 /*ucmpe32_set(t->mapping, cp, CE);*/
889 utrie_set32(t->mapping, cp, CE);
890 } else if(!isContraction(CE)) { /* this is just a surrogate, and there is no contraction */
891 /*ucmpe32_set(t->mapping, cp, element->mapCE);*/
892 utrie_set32(t->mapping, cp, element->mapCE);
893 } else { /* fill out the first stage of the contraction with the surrogate CE */
894 uprv_cnttab_changeContraction(contractions, CE, 0, element->mapCE, status);
895 uprv_cnttab_changeContraction(contractions, CE, 0xFFFF, element->mapCE, status);
896 }
897 return CE;
898}
899
900
901static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status) {
902 int32_t firstContractionOffset = 0;
903 int32_t contractionOffset = 0;
904// uint32_t contractionElement = UCOL_NOT_FOUND;
905
906 if(U_FAILURE(*status)) {
907 return UCOL_NOT_FOUND;
908 }
909
910 /* end of recursion */
911 if(element->cSize == 1) {
912 if(isCntTableElement(existingCE) && ((UColCETags)getCETag(existingCE) == contractions->currentTag)) {
913 uprv_cnttab_changeContraction(contractions, existingCE, 0, element->mapCE, status);
914 uprv_cnttab_changeContraction(contractions, existingCE, 0xFFFF, element->mapCE, status);
915 return existingCE;
916 } else {
917 return element->mapCE; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */
918 }
919 }
920
921 /* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */
922 /* for both backward and forward cycles */
923
924 /* we encountered either an empty space or a non-contraction element */
925 /* this means we are constructing a new contraction sequence */
926 element->cPoints++;
927 element->cSize--;
928 if(!isCntTableElement(existingCE)) {
929 /* if it wasn't contraction, we wouldn't end up here*/
930 firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, existingCE, status);
931 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
932 contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
933 contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, status);
934 existingCE = constructContractCE(contractions->currentTag, firstContractionOffset);
935 } else { /* we are adding to existing contraction */
936 /* there were already some elements in the table, so we need to add a new contraction */
937 /* Two things can happen here: either the codepoint is already in the table, or it is not */
938 int32_t position = uprv_cnttab_findCP(contractions, existingCE, *element->cPoints, status);
939 if(position > 0) { /* if it is we just continue down the chain */
940 uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, status);
941 uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
942 uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, status);
943 } else { /* if it isn't, we will have to create a new sequence */
944 uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
945 uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, status);
946 }
947 }
948 element->cPoints--;
949 element->cSize++;
950 return existingCE;
951}
952
953static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element, UErrorCode *status) {
954 uint32_t CE = UCOL_NOT_FOUND;
955 // This should add a completely ignorable element to the
956 // unsafe table, so that backward iteration will skip
957 // over it when treating contractions.
958 uint32_t i = 0;
959 if(element->mapCE == 0) {
960 for(i = 0; i < element->cSize; i++) {
961 if(!UTF_IS_TRAIL(element->cPoints[i])) {
962 unsafeCPSet(t->unsafeCP, element->cPoints[i]);
963 }
964 }
965 }
966 if(element->cSize > 1) { /* we're adding a contraction */
967 uint32_t i = 0;
968 UChar32 cp;
969
970 UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp);
971 /*CE = ucmpe32_get(t->mapping, cp);*/
972 CE = utrie_get32(t->mapping, cp, NULL);
973
974 CE = uprv_uca_addContraction(t, CE, element, status);
975 } else { /* easy case, */
976 /*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/
977 CE = utrie_get32(t->mapping, element->cPoints[0], NULL);
978
979 if( CE != UCOL_NOT_FOUND) {
980 if(isCntTableElement(CE) /*isContraction(CE)*/) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */
981 if(!isPrefix(element->mapCE)) { // we cannot reenter prefix elements - as we are going to create a dead loop
982 // Only expansions and regular CEs can go here... Contractions will never happen in this place
983 uprv_cnttab_setContraction(t->contractions, CE, 0, 0, element->mapCE, status);
984 /* This loop has to change the CE at the end of contraction REDO!*/
985 uprv_cnttab_changeLastCE(t->contractions, CE, element->mapCE, status);
986 }
987 } else {
988 /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
989 utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
990#ifdef UCOL_DEBUG
991 fprintf(stderr, "Warning - trying to overwrite existing data %08X for cp %04X with %08X\n", CE, element->cPoints[0], element->CEs[0]);
992 //*status = U_ILLEGAL_ARGUMENT_ERROR;
993#endif
994 }
995 } else {
996 /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
997 utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
998 }
999 }
1000 return CE;
1001}
1002
1003/* This adds a read element, while testing for existence */
1004U_CAPI uint32_t U_EXPORT2
1005uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) {
1006 ExpansionTable *expansions = t->expansions;
1007
1008 uint32_t i = 1;
1009 uint32_t expansion = 0;
1010 uint32_t CE;
1011
1012 if(U_FAILURE(*status)) {
1013 return 0xFFFF;
1014 }
374ca955
A
1015
1016 element->mapCE = 0; // clear mapCE so that we can catch expansions
1017
b75a7d8f
A
1018 if(element->noOfCEs == 1) {
1019 if(element->isThai == FALSE) {
374ca955 1020 element->mapCE = element->CEs[0];
b75a7d8f
A
1021 } else { /* add thai - totally bad here */
1022 expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (THAI_TAG<<UCOL_TAG_SHIFT)
1023 | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
1024 | 0x1);
1025 element->mapCE = expansion;
1026 }
1027 } else {
1028 /* ICU 2.1 long primaries */
1029 /* unfortunately, it looks like we have to look for a long primary here */
1030 /* since in canonical closure we are going to hit some long primaries from */
1031 /* the first phase, and they will come back as continuations/expansions */
1032 /* destroying the effect of the previous opitimization */
1033 /* A long primary is a three byte primary with starting secondaries and tertiaries */
1034 /* It can appear in long runs of only primary differences (like east Asian tailorings) */
1035 /* also, it should not be an expansion, as expansions would break with this */
1036 // This part came in from ucol_bld.cpp
1037 //if(tok->expansion == 0
1038 //&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1
1039 //&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) {
1040 /* we will construct a special CE that will go unchanged to the table */
1041 if(element->noOfCEs == 2 // a two CE expansion
1042 && isContinuation(element->CEs[1]) // which is a continuation
1043 && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation,
1044 && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary
1045 && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary
1046 ) {
1047#ifdef UCOL_DEBUG
1048 fprintf(stdout, "Long primary %04X\n", element->cPoints[0]);
1049#endif
1050 element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special
1051 | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary
1052 | ((element->CEs[1]>>24) & 0xFF); // third byte of primary
1053 } else {
374ca955
A
1054 expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
1055 | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
1056 & 0xFFFFF0);
1057
b75a7d8f
A
1058 for(i = 1; i<element->noOfCEs; i++) {
1059 uprv_uca_addExpansion(expansions, element->CEs[i], status);
1060 }
1061 if(element->noOfCEs <= 0xF) {
1062 expansion |= element->noOfCEs;
1063 } else {
1064 uprv_uca_addExpansion(expansions, 0, status);
1065 }
1066 element->mapCE = expansion;
1067 uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1],
1068 (uint8_t)element->noOfCEs,
1069 t->maxExpansions,
1070 status);
1071 if(UCOL_ISJAMO(element->cPoints[0])) {
1072 t->image->jamoSpecial = TRUE;
1073 uprv_uca_setMaxJamoExpansion(element->cPoints[0],
1074 element->CEs[element->noOfCEs - 1],
1075 (uint8_t)element->noOfCEs,
1076 t->maxJamoExpansions,
1077 status);
1078 }
1079 }
1080 }
1081
374ca955
A
1082 // We treat digits differently - they are "uber special" and should be
1083 // processed differently if numeric collation is on.
1084 UChar32 uniChar = 0;
1085 //printElement(element);
1086 if ((element->cSize == 2) && U16_IS_LEAD(element->uchars[0])){
1087 uniChar = U16_GET_SUPPLEMENTARY(element->uchars[0], element->uchars[1]);
1088 } else if (element->cSize == 1){
1089 uniChar = element->uchars[0];
1090 }
1091
1092 // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only
1093 // one element to the expansion buffer. When we encounter a digit and we don't
1094 // do numeric collation, we will just pick the CE we have and break out of case
1095 // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked
1096 // a special, further processing will occur. If it's a simple CE, we'll return due
1097 // to how the loop is constructed.
1098 if (uniChar != 0 && u_isdigit(uniChar)){
1099 expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element
1100 if(element->mapCE) { // if there is an expansion, we'll pick it here
1101 expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4);
1102 } else {
1103 expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4);
1104 }
1105 element->mapCE = expansion;
1106
1107 // Need to go back to the beginning of the digit string if in the middle!
1108 if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars
1109 unsafeCPSet(t->unsafeCP, (UChar)uniChar);
1110 }
1111 }
1112
b75a7d8f
A
1113 // here we want to add the prefix structure.
1114 // I will try to process it as a reverse contraction, if possible.
1115 // prefix buffer is already reversed.
1116
1117 if(element->prefixSize!=0) {
1118 // We keep the seen prefix starter elements in a hashtable
1119 // we need it to be able to distinguish between the simple
1120 // codepoints and prefix starters. Also, we need to use it
1121 // for canonical closure.
1122
1123 UCAElements *composed = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1124 /* test for NULL */
1125 if (composed == NULL) {
1126 *status = U_MEMORY_ALLOCATION_ERROR;
1127 return 0;
1128 }
1129 uprv_memcpy(composed, element, sizeof(UCAElements));
1130 composed->cPoints = composed->uchars;
1131 composed->prefix = composed->prefixChars;
1132
1133 composed->prefixSize = unorm_normalize(element->prefix, element->prefixSize, UNORM_NFC, 0, composed->prefix, 128, status);
1134
1135
1136 if(t->prefixLookup != NULL) {
1137 UCAElements *uCE = (UCAElements *)uhash_get(t->prefixLookup, element);
1138 if(uCE != NULL) { // there is already a set of code points here
1139 element->mapCE = uprv_uca_addPrefix(t, uCE->mapCE, element, status);
1140 } else { // no code points, so this spot is clean
1141 element->mapCE = uprv_uca_addPrefix(t, UCOL_NOT_FOUND, element, status);
1142 uCE = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1143 /* test for NULL */
1144 if (uCE == NULL) {
1145 *status = U_MEMORY_ALLOCATION_ERROR;
1146 return 0;
1147 }
1148 uprv_memcpy(uCE, element, sizeof(UCAElements));
1149 uCE->cPoints = uCE->uchars;
1150 uhash_put(t->prefixLookup, uCE, uCE, status);
1151 }
1152 if(composed->prefixSize != element->prefixSize || uprv_memcmp(composed->prefix, element->prefix, element->prefixSize)) {
1153 // do it!
1154 composed->mapCE = uprv_uca_addPrefix(t, element->mapCE, composed, status);
1155 }
1156 }
1157 uprv_free(composed);
1158 }
1159
1160 // We need to use the canonical iterator here
1161 // the way we do it is to generate the canonically equivalent strings
1162 // for the contraction and then add the sequences that pass FCD check
1163 if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
1164 UnicodeString source(element->cPoints, element->cSize);
1165 CanonicalIterator it(source, *status);
1166 source = it.next();
1167 while(!source.isBogus()) {
1168 if(Normalizer::quickCheck(source, UNORM_FCD, *status) != UNORM_NO) {
1169 element->cSize = source.extract(element->cPoints, 128, *status);
1170 uprv_uca_finalizeAddition(t, element, status);
1171 }
1172 source = it.next();
1173 }
1174 CE = element->mapCE;
1175 } else {
1176 CE = uprv_uca_finalizeAddition(t, element, status);
1177 }
1178
1179 return CE;
1180}
1181
1182
1183/*void uprv_uca_getMaxExpansionJamo(CompactEIntArray *mapping, */
374ca955 1184static void uprv_uca_getMaxExpansionJamo(UNewTrie *mapping,
b75a7d8f
A
1185 MaxExpansionTable *maxexpansion,
1186 MaxJamoExpansionTable *maxjamoexpansion,
1187 UBool jamospecial,
1188 UErrorCode *status)
1189{
1190 const uint32_t VBASE = 0x1161;
1191 const uint32_t TBASE = 0x11A8;
1192 const uint32_t VCOUNT = 21;
1193 const uint32_t TCOUNT = 28;
374ca955 1194
b75a7d8f
A
1195 uint32_t v = VBASE + VCOUNT - 1;
1196 uint32_t t = TBASE + TCOUNT - 1;
1197 uint32_t ce;
1198
1199 while (v >= VBASE) {
1200 /*ce = ucmpe32_get(mapping, v);*/
1201 ce = utrie_get32(mapping, v, NULL);
1202 if (ce < UCOL_SPECIAL_FLAG) {
1203 uprv_uca_setMaxExpansion(ce, 2, maxexpansion, status);
1204 }
1205 v --;
1206 }
1207
1208 while (t >= TBASE)
1209 {
1210 /*ce = ucmpe32_get(mapping, t);*/
1211 ce = utrie_get32(mapping, t, NULL);
1212 if (ce < UCOL_SPECIAL_FLAG) {
1213 uprv_uca_setMaxExpansion(ce, 3, maxexpansion, status);
1214 }
1215 t --;
1216 }
1217 /* According to the docs, 99% of the time, the Jamo will not be special */
1218 if (jamospecial) {
1219 /* gets the max expansion in all unicode characters */
1220 int count = maxjamoexpansion->position;
1221 uint8_t maxTSize = (uint8_t)(maxjamoexpansion->maxLSize +
1222 maxjamoexpansion->maxVSize +
1223 maxjamoexpansion->maxTSize);
1224 uint8_t maxVSize = (uint8_t)(maxjamoexpansion->maxLSize +
1225 maxjamoexpansion->maxVSize);
1226
1227 while (count > 0) {
1228 count --;
1229 if (*(maxjamoexpansion->isV + count) == TRUE) {
1230 uprv_uca_setMaxExpansion(
1231 *(maxjamoexpansion->endExpansionCE + count),
1232 maxVSize, maxexpansion, status);
1233 }
1234 else {
1235 uprv_uca_setMaxExpansion(
1236 *(maxjamoexpansion->endExpansionCE + count),
1237 maxTSize, maxexpansion, status);
1238 }
1239 }
1240 }
1241}
1242
1243U_CDECL_BEGIN
1244static inline uint32_t U_CALLCONV
1245getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset)
1246{
1247 uint32_t value;
1248 uint32_t tag;
1249 UChar32 limit;
1250 UBool inBlockZero;
1251
1252 limit=start+0x400;
1253 while(start<limit) {
1254 value=utrie_get32(trie, start, &inBlockZero);
1255 tag = getCETag(value);
1256 if(inBlockZero == TRUE) {
1257 start+=UTRIE_DATA_BLOCK_LENGTH;
1258 } else if(!(isSpecial(value) && (tag == IMPLICIT_TAG || tag == NOT_FOUND_TAG))) {
1259 /* These are values that are starting in either UCA (IMPLICIT_TAG) or in the
1260 * tailorings (NOT_FOUND_TAG). Presence of these tags means that there is
1261 * nothing in this position and that it should be skipped.
1262 */
1263#ifdef UCOL_DEBUG
1264 static int32_t count = 1;
1265 fprintf(stdout, "%i, Folded %08X, value %08X\n", count++, start, value);
1266#endif
1267 return (uint32_t)(UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24) | offset);
1268 } else {
1269 ++start;
1270 }
1271 }
1272 return 0;
1273}
1274U_CDECL_END
1275
1276#ifdef UCOL_DEBUG
1277// This is a debug function to print the contents of a trie.
1278// It is used in conjuction with the code around utrie_unserialize call
1279void enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
1280 if(start<0x10000) {
1281 fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value);
1282 } else {
1283 fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value);
1284 }
1285}
1286
1287int32_t
1288myGetFoldingOffset(uint32_t data) {
1289 if(data > UCOL_NOT_FOUND && getCETag(data) == SURROGATE_TAG) {
1290 return (data&0xFFFFFF);
1291 } else {
1292 return 0;
1293 }
1294}
1295#endif
1296
1297U_CAPI UCATableHeader* U_EXPORT2
1298uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) {
1299 /*CompactEIntArray *mapping = t->mapping;*/
1300 UNewTrie *mapping = t->mapping;
1301 ExpansionTable *expansions = t->expansions;
1302 CntTable *contractions = t->contractions;
1303 MaxExpansionTable *maxexpansion = t->maxExpansions;
1304
1305 if(U_FAILURE(*status)) {
1306 return NULL;
1307 }
1308
1309 uint32_t beforeContractions = (uint32_t)((headersize+paddedsize(expansions->position*sizeof(uint32_t)))/sizeof(UChar));
1310
1311 int32_t contractionsSize = 0;
1312 contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status);
1313
1314 /* the following operation depends on the trie data. Therefore, we have to do it before */
1315 /* the trie is compacted */
1316 /* sets jamo expansions */
1317 uprv_uca_getMaxExpansionJamo(mapping, maxexpansion, t->maxJamoExpansions,
1318 t->image->jamoSpecial, status);
1319
1320 /*ucmpe32_compact(mapping);*/
1321 /*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/
1322 /*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/
1323 /*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/
1324
1325 // After setting the jamo expansions, compact the trie and get the needed size
1326 int32_t mappingSize = utrie_serialize(mapping, NULL, 0, getFoldedValue /*getFoldedValue*/, FALSE, status);
1327
1328 uint32_t tableOffset = 0;
1329 uint8_t *dataStart;
1330
1331 /* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */
1332
1333 uint32_t toAllocate =(uint32_t)(headersize+
1334 paddedsize(expansions->position*sizeof(uint32_t))+
1335 paddedsize(mappingSize)+
1336 paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t)))+
1337 //paddedsize(0x100*sizeof(uint32_t)) /* Latin1 is now included in the trie */
1338 /* maxexpansion array */
1339 + paddedsize(maxexpansion->position * sizeof(uint32_t)) +
1340 /* maxexpansion size array */
1341 paddedsize(maxexpansion->position * sizeof(uint8_t)) +
1342 paddedsize(UCOL_UNSAFECP_TABLE_SIZE) + /* Unsafe chars */
1343 paddedsize(UCOL_UNSAFECP_TABLE_SIZE)); /* Contraction Ending chars */
1344
1345
1346 dataStart = (uint8_t *)uprv_malloc(toAllocate);
1347 /* test for NULL */
1348 if (dataStart == NULL) {
1349 *status = U_MEMORY_ALLOCATION_ERROR;
1350 return NULL;
1351 }
1352
1353 UCATableHeader *myData = (UCATableHeader *)dataStart;
374ca955
A
1354 // Please, do reset all the fields!
1355 uprv_memset(dataStart, 0, toAllocate);
1356 // Make sure we know this is reset
1357 myData->magic = UCOL_HEADER_MAGIC;
1358 myData->isBigEndian = U_IS_BIG_ENDIAN;
1359 myData->charSetFamily = U_CHARSET_FAMILY;
1360 myData->formatVersion[0] = UCA_FORMAT_VERSION_0;
1361 myData->formatVersion[1] = UCA_FORMAT_VERSION_1;
1362 myData->formatVersion[2] = UCA_FORMAT_VERSION_2;
1363 myData->formatVersion[3] = UCA_FORMAT_VERSION_3;
1364 myData->jamoSpecial = t->image->jamoSpecial;
1365
1366 // Don't copy stuff from UCA header!
1367 //uprv_memcpy(myData, t->image, sizeof(UCATableHeader));
b75a7d8f
A
1368
1369 myData->contractionSize = contractionsSize;
1370
1371 tableOffset += (uint32_t)(paddedsize(sizeof(UCATableHeader)));
1372
1373 myData->options = tableOffset;
1374 uprv_memcpy(dataStart+tableOffset, t->options, sizeof(UColOptionSet));
1375 tableOffset += (uint32_t)(paddedsize(sizeof(UColOptionSet)));
1376
1377 /* copy expansions */
1378 /*myData->expansion = (uint32_t *)dataStart+tableOffset;*/
1379 myData->expansion = tableOffset;
1380 uprv_memcpy(dataStart+tableOffset, expansions->CEs, expansions->position*sizeof(uint32_t));
1381 tableOffset += (uint32_t)(paddedsize(expansions->position*sizeof(uint32_t)));
1382
1383 /* contractions block */
1384 if(contractionsSize != 0) {
1385 /* copy contraction index */
1386 /*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/
1387 myData->contractionIndex = tableOffset;
1388 uprv_memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar));
1389 tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(UChar)));
1390
1391 /* copy contraction collation elements */
1392 /*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/
1393 myData->contractionCEs = tableOffset;
1394 uprv_memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t));
1395 tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t)));
1396 } else {
1397 myData->contractionIndex = 0;
374ca955 1398 myData->contractionCEs = 0;
b75a7d8f
A
1399 }
1400
1401 /* copy mapping table */
1402 /*myData->mappingPosition = dataStart+tableOffset;*/
1403 /*myData->mappingPosition = tableOffset;*/
1404 /*uprv_memcpy(dataStart+tableOffset, flattened, mappingSize);*/
1405
1406 myData->mappingPosition = tableOffset;
1407 utrie_serialize(mapping, dataStart+tableOffset, toAllocate-tableOffset, getFoldedValue, FALSE, status);
1408#ifdef UCOL_DEBUG
1409 // This is debug code to dump the contents of the trie. It needs two functions defined above
1410 {
1411 UTrie UCAt = { 0 };
374ca955 1412 uint32_t trieWord;
b75a7d8f
A
1413 utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status);
1414 UCAt.getFoldingOffset = myGetFoldingOffset;
1415 if(U_SUCCESS(*status)) {
1416 utrie_enum(&UCAt, NULL, enumRange, NULL);
1417 }
374ca955 1418 trieWord = UTRIE_GET32_FROM_LEAD(UCAt, 0xDC01)
b75a7d8f
A
1419 }
1420#endif
1421 tableOffset += paddedsize(mappingSize);
1422
1423
1424 int32_t i = 0;
1425
1426 /* copy max expansion table */
1427 myData->endExpansionCE = tableOffset;
1428 myData->endExpansionCECount = maxexpansion->position;
1429 /* not copying the first element which is a dummy */
1430 uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1,
1431 maxexpansion->position * sizeof(uint32_t));
1432 tableOffset += (uint32_t)(paddedsize(maxexpansion->position * sizeof(uint32_t)));
1433 myData->expansionCESize = tableOffset;
1434 uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1,
1435 maxexpansion->position * sizeof(uint8_t));
1436 tableOffset += (uint32_t)(paddedsize(maxexpansion->position * sizeof(uint8_t)));
1437
1438 /* Unsafe chars table. Finish it off, then copy it. */
1439 uprv_uca_unsafeCPAddCCNZ(t, status);
1440 if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */
1441 for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1442 t->unsafeCP[i] |= t->UCA->unsafeCP[i];
1443 }
1444 }
1445 myData->unsafeCP = tableOffset;
1446 uprv_memcpy(dataStart + tableOffset, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
1447 tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1448
1449
1450 /* Finish building Contraction Ending chars hash table and then copy it out. */
1451 if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */
1452 for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1453 t->contrEndCP[i] |= t->UCA->contrEndCP[i];
1454 }
1455 }
1456 myData->contrEndCP = tableOffset;
1457 uprv_memcpy(dataStart + tableOffset, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
1458 tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1459
1460 if(tableOffset != toAllocate) {
1461#ifdef UCOL_DEBUG
1462 fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset);
1463#endif
1464 *status = U_INTERNAL_PROGRAM_ERROR;
1465 uprv_free(dataStart);
1466 return 0;
1467 }
1468
1469 myData->size = tableOffset;
1470 /* This should happen upon ressurection */
1471 /*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/
1472 /*uprv_mstrm_close(ms);*/
1473 return myData;
1474}
1475
1476
1477struct enumStruct {
1478 tempUCATable *t;
1479 UCollator *tempColl;
1480 UCollationElements* colEl;
1481 int32_t noOfClosures;
1482 UErrorCode *status;
1483};
1484U_CDECL_BEGIN
1485static UBool U_CALLCONV
1486_enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1487
1488 UErrorCode *status = ((enumStruct *)context)->status;
1489 tempUCATable *t = ((enumStruct *)context)->t;
1490 UCollator *tempColl = ((enumStruct *)context)->tempColl;
1491 UCollationElements* colEl = ((enumStruct *)context)->colEl;
1492 UCAElements el;
1493 UChar decomp[256] = { 0 };
1494 int32_t noOfDec = 0;
1495
1496 UChar32 u32 = 0;
1497 UChar comp[2];
1498 uint32_t len = 0;
1499
1500 if (type != U_UNASSIGNED && type != U_PRIVATE_USE_CHAR) { // if the range is assigned - we might ommit more categories later
1501 for(u32 = start; u32 < limit; u32++) {
1502 noOfDec = unorm_getDecomposition(u32, FALSE, decomp, 256);
1503 //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1
1504 //|| (noOfDec == 1 && *decomp != (UChar)u32))
1505 if(noOfDec > 0) // if we're positive, that means there is no decomposition
1506 {
1507 len = 0;
1508 UTF_APPEND_CHAR_UNSAFE(comp, len, u32);
1509 if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) {
1510#ifdef UCOL_DEBUG
1511 fprintf(stderr, "Closure: %08X -> ", u32);
1512 uint32_t i = 0;
1513 for(i = 0; i<noOfDec; i++) {
1514 fprintf(stderr, "%04X ", decomp[i]);
1515 }
1516 fprintf(stderr, "\n");
1517#endif
1518 ((enumStruct *)context)->noOfClosures++;
1519 el.cPoints = decomp;
1520 el.cSize = noOfDec;
1521 el.noOfCEs = 0;
1522 el.prefix = el.prefixChars;
1523 el.prefixSize = 0;
1524
1525 UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el);
1526 if(prefix == NULL) {
1527 el.cPoints = comp;
1528 el.cSize = len;
1529 el.prefix = el.prefixChars;
1530 el.prefixSize = 0;
1531 el.noOfCEs = 0;
1532 ucol_setText(colEl, decomp, noOfDec, status);
374ca955 1533 while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
b75a7d8f
A
1534 el.noOfCEs++;
1535 }
1536 } else {
1537 el.cPoints = comp;
1538 el.cSize = len;
1539 el.prefix = el.prefixChars;
1540 el.prefixSize = 0;
1541 el.noOfCEs = 1;
1542 el.CEs[0] = prefix->mapCE;
1543 // This character uses a prefix. We have to add it
1544 // to the unsafe table, as it decomposed form is already
1545 // in. In Japanese, this happens for \u309e & \u30fe
1546 // Since unsafeCPSet is static in ucol_elm, we are going
1547 // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function
1548 }
1549 if(UCOL_ISTHAIPREVOWEL(el.cPoints[0])) {
1550 el.isThai = TRUE;
1551 } else {
1552 el.isThai = FALSE;
1553 }
1554
1555 uprv_uca_addAnElement(t, &el, status);
1556 }
1557 }
1558 }
1559 }
1560 return TRUE;
1561}
1562U_CDECL_END
1563
1564U_CAPI int32_t U_EXPORT2
1565uprv_uca_canonicalClosure(tempUCATable *t, UErrorCode *status)
1566{
1567 enumStruct context;
1568 context.noOfClosures = 0;
1569 if(U_SUCCESS(*status)) {
1570 UCollator *tempColl = NULL;
1571 tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status);
1572
1573 UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status);
374ca955 1574 tempColl = ucol_initCollator(tempData, 0, t->UCA, status);
b75a7d8f
A
1575 uprv_uca_closeTempTable(tempTable);
1576
1577 if(U_SUCCESS(*status)) {
1578 tempColl->rb = NULL;
1579 tempColl->elements = NULL;
1580 tempColl->validLocale = NULL;
1581 tempColl->requestedLocale = NULL;
1582 tempColl->hasRealData = TRUE;
1583 tempColl->freeImageOnClose = TRUE;
1584 } else if(tempData != 0) {
1585 uprv_free(tempData);
1586 }
1587
1588 /* produce canonical closure */
1589 UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status);
1590
1591 context.t = t;
1592 context.tempColl = tempColl;
1593 context.colEl = colEl;
1594 context.status = status;
1595 u_enumCharTypes(_enumCategoryRangeClosureCategory, &context);
1596
1597 ucol_closeElements(colEl);
1598 ucol_close(tempColl);
1599 }
1600 return context.noOfClosures;
1601}
1602
1603U_NAMESPACE_END
1604
374ca955
A
1605#endif /* #if !UCONFIG_NO_COLLATION */
1606
1607