icuSources/i18n/ucol_elm.cpp

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 2001-2006, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  ucaelems.cpp
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created 02/22/2001
  14 *   created by: Vladimir Weinstein
  15 *
  16 *   This program reads the Franctional UCA table and generates
  17 *   internal format for UCA table as well as inverse UCA table.
  18 *   It then writes binary files containing the data: ucadata.dat
  19 *   & invuca.dat
  20 *
  21 *   date        name       comments
  22 *   03/02/2001  synwee     added setMaxExpansion
  23 *   03/07/2001  synwee     merged UCA's maxexpansion and tailoring's
  24 */
  25
  26 #include "unicode/utypes.h"
  27
  28 #if !UCONFIG_NO_COLLATION
  29
  30 #include "unicode/uchar.h"
  31 #include "unicode/unistr.h"
  32 #include "unicode/ucoleitr.h"
  33 #include "unicode/normlzr.h"
  34 #include "ucol_elm.h"
  35 #include "unormimp.h"
  36 #include "unicode/caniter.h"
  37 #include "cmemory.h"
  38
  39 static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status);
  40
  41 U_CDECL_BEGIN
  42 static int32_t U_CALLCONV
  43 prefixLookupHash(const UHashTok e) {
  44   UCAElements *element = (UCAElements *)e.pointer;
  45   UChar buf[256];
  46   UHashTok key;
  47   key.pointer = buf;
  48   uprv_memcpy(buf, element->cPoints, element->cSize*sizeof(UChar));
  49   buf[element->cSize] = 0;
  50   //key.pointer = element->cPoints;
  51   //element->cPoints[element->cSize] = 0;
  52   return uhash_hashUChars(key);
  53 }
  54
  55 static int8_t U_CALLCONV
  56 prefixLookupComp(const UHashTok e1, const UHashTok e2) {
  57   UCAElements *element1 = (UCAElements *)e1.pointer;
  58   UCAElements *element2 = (UCAElements *)e2.pointer;
  59
  60   UChar buf1[256];
  61   UHashTok key1;
  62   key1.pointer = buf1;
  63   uprv_memcpy(buf1, element1->cPoints, element1->cSize*sizeof(UChar));
  64   buf1[element1->cSize] = 0;
  65
  66   UChar buf2[256];
  67   UHashTok key2;
  68   key2.pointer = buf2;
  69   uprv_memcpy(buf2, element2->cPoints, element2->cSize*sizeof(UChar));
  70   buf2[element2->cSize] = 0;
  71
  72   return uhash_compareUChars(key1, key2);
  73 }
  74 U_CDECL_END
  75
  76 static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value, UErrorCode *status) {
  77     if(U_FAILURE(*status)) {
  78         return 0;
  79     }
  80     if(expansions->CEs == NULL) {
  81         expansions->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
  82         /* test for NULL */
  83         if (expansions->CEs == NULL) {
  84             *status = U_MEMORY_ALLOCATION_ERROR;
  85             return 0;
  86         }
  87         expansions->size = INIT_EXP_TABLE_SIZE;
  88         expansions->position = 0;
  89     }
  90
  91     if(expansions->position == expansions->size) {
  92         uint32_t *newData = (uint32_t *)uprv_realloc(expansions->CEs, 2*expansions->size*sizeof(uint32_t));
  93         if(newData == NULL) {
  94 #ifdef UCOL_DEBUG
  95             fprintf(stderr, "out of memory for expansions\n");
  96 #endif
  97             *status = U_MEMORY_ALLOCATION_ERROR;
  98             return -1;
  99         }
 100         expansions->CEs = newData;
 101         expansions->size *= 2;
 102     }
 103
 104     expansions->CEs[expansions->position] = value;
 105     return(expansions->position++);
 106 }
 107
 108 U_CAPI tempUCATable*  U_EXPORT2
 109 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) {
 110   MaxJamoExpansionTable *maxjet;
 111   MaxExpansionTable *maxet;
 112   tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
 113   /* test for NULL */
 114   if (t == NULL) {
 115       *status = U_MEMORY_ALLOCATION_ERROR;
 116       return NULL;
 117   }
 118   uprv_memset(t, 0, sizeof(tempUCATable));
 119
 120   maxet  = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable));
 121   if (maxet == NULL) {
 122       goto allocation_failure;
 123   }
 124   uprv_memset(maxet, 0, sizeof(MaxExpansionTable));
 125   t->maxExpansions       = maxet;
 126
 127   maxjet = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable));
 128   if (maxjet == NULL) {
 129       goto allocation_failure;
 130   }
 131   uprv_memset(maxjet, 0, sizeof(MaxJamoExpansionTable));
 132   t->maxJamoExpansions = maxjet;
 133
 134   t->image = image;
 135   t->options = opts;
 136
 137   t->UCA = UCA;
 138   t->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
 139   /* test for NULL */
 140   if (t->expansions == NULL) {
 141       goto allocation_failure;
 142   }
 143   uprv_memset(t->expansions, 0, sizeof(ExpansionTable));
 144   /*t->mapping = ucmpe32_open(UCOL_SPECIAL_FLAG | (initTag<<24), UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24), UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG<<24), status);*/
 145   /*t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1*/
 146
 147   t->mapping = utrie_open(NULL, NULL, 0x100000,
 148                           UCOL_SPECIAL_FLAG | (initTag<<24),
 149                           UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24),
 150                           TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
 151   t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, NULL, status);
 152   uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
 153
 154   t->contractions = uprv_cnttab_open(t->mapping, status);
 155
 156   /* copy UCA's maxexpansion and merge as we go along */
 157   if (UCA != NULL) {
 158     /* adding an extra initial value for easier manipulation */
 159     maxet->size            = (UCA->lastEndExpansionCE - UCA->endExpansionCE)
 160                              + 2;
 161     maxet->position        = maxet->size - 1;
 162     maxet->endExpansionCE  =
 163                       (uint32_t *)uprv_malloc(sizeof(uint32_t) * maxet->size);
 164     /* test for NULL */
 165     if (maxet->endExpansionCE == NULL) {
 166         goto allocation_failure;
 167     }
 168     maxet->expansionCESize =
 169                         (uint8_t *)uprv_malloc(sizeof(uint8_t) * maxet->size);
 170     /* test for NULL */
 171     if (maxet->expansionCESize == NULL) {
 172         goto allocation_failure;
 173     }
 174     /* initialized value */
 175     *(maxet->endExpansionCE)  = 0;
 176     *(maxet->expansionCESize) = 0;
 177     uprv_memcpy(maxet->endExpansionCE + 1, UCA->endExpansionCE,
 178                 sizeof(uint32_t) * (maxet->size - 1));
 179     uprv_memcpy(maxet->expansionCESize + 1, UCA->expansionCESize,
 180                 sizeof(uint8_t) * (maxet->size - 1));
 181   }
 182   else {
 183     maxet->size     = 0;
 184   }
 185   maxjet->endExpansionCE = NULL;
 186   maxjet->isV = NULL;
 187   maxjet->size = 0;
 188   maxjet->position = 0;
 189   maxjet->maxLSize = 1;
 190   maxjet->maxVSize = 1;
 191   maxjet->maxTSize = 1;
 192
 193   t->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 194   /* test for NULL */
 195   if (t->unsafeCP == NULL) {
 196       goto allocation_failure;
 197   }
 198   t->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 199   /* test for NULL */
 200   if (t->contrEndCP == NULL) {
 201       goto allocation_failure;
 202   }
 203   uprv_memset(t->unsafeCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
 204   uprv_memset(t->contrEndCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
 205   return t;
 206
 207 allocation_failure:
 208   uprv_uca_closeTempTable(t);
 209   *status = U_MEMORY_ALLOCATION_ERROR;
 210   return NULL;
 211 }
 212
 213 U_CAPI tempUCATable* U_EXPORT2
 214 uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) {
 215   if(U_FAILURE(*status)) {
 216     return NULL;
 217   }
 218
 219   tempUCATable *r = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
 220   /* test for NULL */
 221   if (r == NULL) {
 222     *status = U_MEMORY_ALLOCATION_ERROR;
 223     return NULL;
 224   }
 225   uprv_memset(r, 0, sizeof(tempUCATable));
 226
 227   /* mapping */
 228   if(t->mapping != NULL) {
 229     /*r->mapping = ucmpe32_clone(t->mapping, status);*/
 230     r->mapping = utrie_clone(NULL, t->mapping, NULL, 0);
 231   }
 232
 233   // a hashing clone function would be very nice. We have none currently...
 234   // However, we should be good, as closing should not produce any prefixed elements.
 235   r->prefixLookup = NULL; // prefixes are not used in closing
 236
 237   /* expansions */
 238   if(t->expansions != NULL) {
 239     r->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
 240     /* test for NULL */
 241     if (r->expansions == NULL) {
 242         *status = U_MEMORY_ALLOCATION_ERROR;
 243         return NULL;
 244     }
 245     r->expansions->position = t->expansions->position;
 246     r->expansions->size = t->expansions->size;
 247     if(t->expansions->CEs != NULL) {
 248       r->expansions->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->expansions->size);
 249       /* test for NULL */
 250       if (r->expansions->CEs == NULL) {
 251           *status = U_MEMORY_ALLOCATION_ERROR;
 252           return NULL;
 253       }
 254       uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position);
 255     } else {
 256       r->expansions->CEs = NULL;
 257     }
 258   }
 259
 260   if(t->contractions != NULL) {
 261     r->contractions = uprv_cnttab_clone(t->contractions, status);
 262     r->contractions->mapping = r->mapping;
 263   }
 264
 265   if(t->maxExpansions != NULL) {
 266     r->maxExpansions = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable));
 267     /* test for NULL */
 268     if (r->maxExpansions == NULL) {
 269         *status = U_MEMORY_ALLOCATION_ERROR;
 270         return NULL;
 271     }
 272     r->maxExpansions->size = t->maxExpansions->size;
 273     r->maxExpansions->position = t->maxExpansions->position;
 274     if(t->maxExpansions->endExpansionCE != NULL) {
 275       r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size);
 276       uprv_memset(r->maxExpansions->endExpansionCE, 0xDB, sizeof(uint32_t)*t->maxExpansions->size);
 277       /* test for NULL */
 278       if (r->maxExpansions->endExpansionCE == NULL) {
 279           *status = U_MEMORY_ALLOCATION_ERROR;
 280           return NULL;
 281       }
 282       uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t));
 283     } else {
 284       r->maxExpansions->endExpansionCE = NULL;
 285     }
 286     if(t->maxExpansions->expansionCESize != NULL) {
 287       r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size);
 288       uprv_memset(r->maxExpansions->expansionCESize, 0xDB, sizeof(uint8_t)*t->maxExpansions->size);
 289       /* test for NULL */
 290       if (r->maxExpansions->expansionCESize == NULL) {
 291           *status = U_MEMORY_ALLOCATION_ERROR;
 292           return NULL;
 293       }
 294       uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t));
 295     } else {
 296       r->maxExpansions->expansionCESize = NULL;
 297     }
 298   }
 299
 300   if(t->maxJamoExpansions != NULL) {
 301     r->maxJamoExpansions = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable));
 302     /* test for NULL */
 303     if (r->maxJamoExpansions == NULL) {
 304         *status = U_MEMORY_ALLOCATION_ERROR;
 305         return NULL;
 306     }
 307     r->maxJamoExpansions->size = t->maxJamoExpansions->size;
 308     r->maxJamoExpansions->position = t->maxJamoExpansions->position;
 309     r->maxJamoExpansions->maxLSize = t->maxJamoExpansions->maxLSize;
 310     r->maxJamoExpansions->maxVSize = t->maxJamoExpansions->maxVSize;
 311     r->maxJamoExpansions->maxTSize = t->maxJamoExpansions->maxTSize;
 312     if(t->maxJamoExpansions->size != 0) {
 313       r->maxJamoExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxJamoExpansions->size);
 314       /* test for NULL */
 315       if (r->maxJamoExpansions->endExpansionCE == NULL) {
 316           *status = U_MEMORY_ALLOCATION_ERROR;
 317           return NULL;
 318       }
 319       uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t));
 320       r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size);
 321       /* test for NULL */
 322       if (r->maxJamoExpansions->isV == NULL) {
 323           *status = U_MEMORY_ALLOCATION_ERROR;
 324           return NULL;
 325       }
 326       uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool));
 327     } else {
 328       r->maxJamoExpansions->endExpansionCE = NULL;
 329       r->maxJamoExpansions->isV = NULL;
 330     }
 331   }
 332
 333   if(t->unsafeCP != NULL) {
 334     r->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 335     /* test for NULL */
 336     if (r->unsafeCP == NULL) {
 337         *status = U_MEMORY_ALLOCATION_ERROR;
 338         return NULL;
 339     }
 340     uprv_memcpy(r->unsafeCP, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
 341   }
 342
 343   if(t->contrEndCP != NULL) {
 344     r->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 345     /* test for NULL */
 346     if (r->contrEndCP == NULL) {
 347         *status = U_MEMORY_ALLOCATION_ERROR;
 348         return NULL;
 349     }
 350     uprv_memcpy(r->contrEndCP, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
 351   }
 352
 353   r->UCA = t->UCA;
 354   r->image = t->image;
 355   r->options = t->options;
 356
 357   return r;
 358 }
 359
 360
 361 U_CAPI void  U_EXPORT2
 362 uprv_uca_closeTempTable(tempUCATable *t) {
 363   if(t != NULL) {
 364     if (t->expansions != NULL) {
 365       uprv_free(t->expansions->CEs);
 366       uprv_free(t->expansions);
 367     }
 368     if(t->contractions != NULL) {
 369       uprv_cnttab_close(t->contractions);
 370     }
 371     if (t->mapping != NULL) {
 372       utrie_close(t->mapping);
 373     }
 374
 375     if(t->prefixLookup != NULL) {
 376       uhash_close(t->prefixLookup);
 377     }
 378
 379     if (t->maxExpansions != NULL) {
 380       uprv_free(t->maxExpansions->endExpansionCE);
 381       uprv_free(t->maxExpansions->expansionCESize);
 382       uprv_free(t->maxExpansions);
 383     }
 384
 385     if (t->maxJamoExpansions->size > 0) {
 386       uprv_free(t->maxJamoExpansions->endExpansionCE);
 387       uprv_free(t->maxJamoExpansions->isV);
 388     }
 389     uprv_free(t->maxJamoExpansions);
 390
 391     uprv_free(t->unsafeCP);
 392     uprv_free(t->contrEndCP);
 393
 394     uprv_free(t);
 395   }
 396 }
 397
 398 /**
 399 * Looks for the maximum length of all expansion sequences ending with the same
 400 * collation element. The size required for maxexpansion and maxsize is
 401 * returned if the arrays are too small.
 402 * @param endexpansion the last expansion collation element to be added
 403 * @param expansionsize size of the expansion
 404 * @param maxexpansion data structure to store the maximum expansion data.
 405 * @param status error status
 406 * @returns size of the maxexpansion and maxsize used.
 407 */
 408 static int uprv_uca_setMaxExpansion(uint32_t           endexpansion,
 409                              uint8_t            expansionsize,
 410                              MaxExpansionTable *maxexpansion,
 411                              UErrorCode        *status)
 412 {
 413   if (maxexpansion->size == 0) {
 414     /* we'll always make the first element 0, for easier manipulation */
 415     maxexpansion->endExpansionCE =
 416                (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(int32_t));
 417     /* test for NULL */
 418     if (maxexpansion->endExpansionCE == NULL) {
 419         *status = U_MEMORY_ALLOCATION_ERROR;
 420         return 0;
 421     }
 422     *(maxexpansion->endExpansionCE) = 0;
 423     maxexpansion->expansionCESize =
 424                (uint8_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint8_t));
 425     /* test for NULL */;
 426     if (maxexpansion->expansionCESize == NULL) {
 427         *status = U_MEMORY_ALLOCATION_ERROR;
 428         return 0;
 429     }
 430     *(maxexpansion->expansionCESize) = 0;
 431     maxexpansion->size     = INIT_EXP_TABLE_SIZE;
 432     maxexpansion->position = 0;
 433   }
 434
 435   if (maxexpansion->position + 1 == maxexpansion->size) {
 436     uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
 437                                    2 * maxexpansion->size * sizeof(uint32_t));
 438     uint8_t  *neweces = (uint8_t *)uprv_realloc(maxexpansion->expansionCESize,
 439                                     2 * maxexpansion->size * sizeof(uint8_t));
 440     if (neweece == NULL || neweces == NULL) {
 441 #ifdef UCOL_DEBUG
 442       fprintf(stderr, "out of memory for maxExpansions\n");
 443 #endif
 444       *status = U_MEMORY_ALLOCATION_ERROR;
 445       return -1;
 446     }
 447     maxexpansion->endExpansionCE  = neweece;
 448     maxexpansion->expansionCESize = neweces;
 449     maxexpansion->size *= 2;
 450   }
 451
 452   uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
 453   uint8_t  *pexpansionsize  = maxexpansion->expansionCESize;
 454   int      pos              = maxexpansion->position;
 455
 456   uint32_t *start = pendexpansionce;
 457   uint32_t *limit = pendexpansionce + pos;
 458
 459   /* using binary search to determine if last expansion element is
 460      already in the array */
 461   uint32_t *mid;
 462   int       result = -1;
 463   while (start < limit - 1) {
 464     mid = start + ((limit - start) >> 1);
 465     if (endexpansion <= *mid) {
 466       limit = mid;
 467     }
 468     else {
 469       start = mid;
 470     }
 471   }
 472
 473   if (*start == endexpansion) {
 474     result = start - pendexpansionce;
 475   }
 476   else
 477     if (*limit == endexpansion) {
 478       result = limit - pendexpansionce;
 479     }
 480
 481   if (result > -1) {
 482     /* found the ce in expansion, we'll just modify the size if it is
 483        smaller */
 484     uint8_t *currentsize = pexpansionsize + result;
 485     if (*currentsize < expansionsize) {
 486       *currentsize = expansionsize;
 487     }
 488   }
 489   else {
 490     /* we'll need to squeeze the value into the array.
 491        initial implementation. */
 492     /* shifting the subarray down by 1 */
 493     int      shiftsize     = (pendexpansionce + pos) - start;
 494     uint32_t *shiftpos     = start + 1;
 495     uint8_t  *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce);
 496
 497     /* okay need to rearrange the array into sorted order */
 498     if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */
 499       *(pendexpansionce + pos + 1) = endexpansion;
 500       *(pexpansionsize + pos + 1)  = expansionsize;
 501     }
 502     else {
 503       uprv_memmove(shiftpos + 1, shiftpos, shiftsize * sizeof(int32_t));
 504       uprv_memmove(sizeshiftpos + 1, sizeshiftpos,
 505                                                 shiftsize * sizeof(uint8_t));
 506       *shiftpos     = endexpansion;
 507       *sizeshiftpos = expansionsize;
 508     }
 509     maxexpansion->position ++;
 510
 511 #ifdef UCOL_DEBUG
 512     int   temp;
 513     UBool found = FALSE;
 514     for (temp = 0; temp < maxexpansion->position; temp ++) {
 515       if (pendexpansionce[temp] >= pendexpansionce[temp + 1]) {
 516         fprintf(stderr, "expansions %d\n", temp);
 517       }
 518       if (pendexpansionce[temp] == endexpansion) {
 519         found =TRUE;
 520         if (pexpansionsize[temp] < expansionsize) {
 521           fprintf(stderr, "expansions size %d\n", temp);
 522         }
 523       }
 524     }
 525     if (pendexpansionce[temp] == endexpansion) {
 526         found =TRUE;
 527         if (pexpansionsize[temp] < expansionsize) {
 528           fprintf(stderr, "expansions size %d\n", temp);
 529         }
 530       }
 531     if (!found)
 532       fprintf(stderr, "expansion not found %d\n", temp);
 533 #endif
 534   }
 535
 536   return maxexpansion->position;
 537 }
 538
 539 /**
 540 * Sets the maximum length of all jamo expansion sequences ending with the same
 541 * collation element. The size required for maxexpansion and maxsize is
 542 * returned if the arrays are too small.
 543 * @param ch the jamo codepoint
 544 * @param endexpansion the last expansion collation element to be added
 545 * @param expansionsize size of the expansion
 546 * @param maxexpansion data structure to store the maximum expansion data.
 547 * @param status error status
 548 * @returns size of the maxexpansion and maxsize used.
 549 */
 550 static int uprv_uca_setMaxJamoExpansion(UChar                  ch,
 551                                  uint32_t               endexpansion,
 552                                  uint8_t                expansionsize,
 553                                  MaxJamoExpansionTable *maxexpansion,
 554                                  UErrorCode            *status)
 555 {
 556   UBool isV = TRUE;
 557   if (((uint32_t)ch - 0x1100) <= (0x1112 - 0x1100)) {
 558       /* determines L for Jamo, doesn't need to store this since it is never
 559       at the end of a expansion */
 560       if (maxexpansion->maxLSize < expansionsize) {
 561           maxexpansion->maxLSize = expansionsize;
 562       }
 563       return maxexpansion->position;
 564   }
 565
 566   if (((uint32_t)ch - 0x1161) <= (0x1175 - 0x1161)) {
 567       /* determines V for Jamo */
 568       if (maxexpansion->maxVSize < expansionsize) {
 569           maxexpansion->maxVSize = expansionsize;
 570       }
 571   }
 572
 573   if (((uint32_t)ch - 0x11A8) <= (0x11C2 - 0x11A8)) {
 574       isV = FALSE;
 575       /* determines T for Jamo */
 576       if (maxexpansion->maxTSize < expansionsize) {
 577           maxexpansion->maxTSize = expansionsize;
 578       }
 579   }
 580
 581   if (maxexpansion->size == 0) {
 582     /* we'll always make the first element 0, for easier manipulation */
 583     maxexpansion->endExpansionCE =
 584                (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint32_t));
 585     /* test for NULL */;
 586     if (maxexpansion->endExpansionCE == NULL) {
 587         *status = U_MEMORY_ALLOCATION_ERROR;
 588         return 0;
 589     }
 590     *(maxexpansion->endExpansionCE) = 0;
 591     maxexpansion->isV =
 592                  (UBool *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(UBool));
 593     /* test for NULL */;
 594     if (maxexpansion->isV == NULL) {
 595         *status = U_MEMORY_ALLOCATION_ERROR;
 596         uprv_free(maxexpansion->endExpansionCE);
 597         maxexpansion->endExpansionCE = NULL;
 598         return 0;
 599     }
 600     *(maxexpansion->isV) = 0;
 601     maxexpansion->size     = INIT_EXP_TABLE_SIZE;
 602     maxexpansion->position = 0;
 603   }
 604
 605   if (maxexpansion->position + 1 == maxexpansion->size) {
 606     maxexpansion->size *= 2;
 607     maxexpansion->endExpansionCE = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
 608                                    maxexpansion->size * sizeof(uint32_t));
 609     if (maxexpansion->endExpansionCE == NULL) {
 610 #ifdef UCOL_DEBUG
 611       fprintf(stderr, "out of memory for maxExpansions\n");
 612 #endif
 613       *status = U_MEMORY_ALLOCATION_ERROR;
 614       return 0;
 615     }
 616     maxexpansion->isV  = (UBool *)uprv_realloc(maxexpansion->isV,
 617                                    maxexpansion->size * sizeof(UBool));
 618     if (maxexpansion->isV == NULL) {
 619 #ifdef UCOL_DEBUG
 620       fprintf(stderr, "out of memory for maxExpansions\n");
 621 #endif
 622       *status = U_MEMORY_ALLOCATION_ERROR;
 623       uprv_free(maxexpansion->endExpansionCE);
 624       maxexpansion->endExpansionCE = NULL;
 625       return 0;
 626     }
 627   }
 628
 629   uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
 630   int       pos             = maxexpansion->position;
 631
 632   while (pos > 0) {
 633       pos --;
 634       if (*(pendexpansionce + pos) == endexpansion) {
 635           return maxexpansion->position;
 636       }
 637   }
 638
 639   *(pendexpansionce + maxexpansion->position) = endexpansion;
 640   *(maxexpansion->isV + maxexpansion->position) = isV;
 641   maxexpansion->position ++;
 642
 643   return maxexpansion->position;
 644 }
 645
 646
 647 static void ContrEndCPSet(uint8_t *table, UChar c) {
 648     uint32_t    hash;
 649     uint8_t     *htByte;
 650
 651     hash = c;
 652     if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
 653         hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
 654     }
 655     htByte = &table[hash>>3];
 656     *htByte |= (1 << (hash & 7));
 657 }
 658
 659
 660 static void unsafeCPSet(uint8_t *table, UChar c) {
 661     uint32_t    hash;
 662     uint8_t     *htByte;
 663
 664     hash = c;
 665     if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
 666         if (hash >= 0xd800 && hash <= 0xf8ff) {
 667             /*  Part of a surrogate, or in private use area.            */
 668             /*   These don't go in the table                            */
 669             return;
 670         }
 671         hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
 672     }
 673     htByte = &table[hash>>3];
 674     *htByte |= (1 << (hash & 7));
 675 }
 676
 677
 678 /*  to the UnsafeCP hash table, add all chars with combining class != 0     */
 679 static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
 680
 681     UChar              c;
 682     uint16_t           fcd;     // Hi byte is lead combining class.
 683                                 // lo byte is trailing combing class.
 684     const uint16_t    *fcdTrieData;
 685
 686     fcdTrieData = unorm_getFCDTrie(status);
 687     if (U_FAILURE(*status)) {
 688         return;
 689     }
 690
 691     for (c=0; c<0xffff; c++) {
 692         fcd = unorm_getFCD16(fcdTrieData, c);
 693         if (fcd >= 0x100 ||               // if the leading combining class(c) > 0 ||
 694             (UTF_IS_LEAD(c) && fcd != 0)) //    c is a leading surrogate with some FCD data
 695                 unsafeCPSet(t->unsafeCP, c);
 696     }
 697
 698     if(t->prefixLookup != NULL) {
 699       int32_t i = -1;
 700       const UHashElement *e = NULL;
 701       UCAElements *element = NULL;
 702       UChar NFCbuf[256];
 703       uint32_t NFCbufLen = 0;
 704       while((e = uhash_nextElement(t->prefixLookup, &i)) != NULL) {
 705         element = (UCAElements *)e->value.pointer;
 706         // codepoints here are in the NFD form. We need to add the
 707         // first code point of the NFC form to unsafe, because
 708         // strcoll needs to backup over them.
 709         NFCbufLen = unorm_normalize(element->cPoints, element->cSize, UNORM_NFC, 0,
 710           NFCbuf, 256, status);
 711         unsafeCPSet(t->unsafeCP, NFCbuf[0]);
 712       }
 713     }
 714 }
 715
 716 static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
 717                                  UCAElements *element, UErrorCode *status) {
 718   // currently the longest prefix we're supporting in Japanese is two characters
 719   // long. Although this table could quite easily mimic complete contraction stuff
 720   // there is no good reason to make a general solution, as it would require some
 721   // error prone messing.
 722     CntTable *contractions = t->contractions;
 723     UChar32 cp;
 724     uint32_t cpsize = 0;
 725     UChar *oldCP = element->cPoints;
 726     uint32_t oldCPSize = element->cSize;
 727
 728
 729     contractions->currentTag = SPEC_PROC_TAG;
 730
 731     // here, we will normalize & add prefix to the table.
 732     uint32_t j = 0;
 733 #ifdef UCOL_DEBUG
 734     for(j=0; j<element->cSize; j++) {
 735       fprintf(stdout, "CP: %04X ", element->cPoints[j]);
 736     }
 737     fprintf(stdout, "El: %08X Pref: ", CE);
 738     for(j=0; j<element->prefixSize; j++) {
 739       fprintf(stdout, "%04X ", element->prefix[j]);
 740     }
 741     fprintf(stdout, "%08X ", element->mapCE);
 742 #endif
 743
 744     for (j = 1; j<element->prefixSize; j++) {   /* First add NFD prefix chars to unsafe CP hash table */
 745       // Unless it is a trail surrogate, which is handled algoritmically and
 746       // shouldn't take up space in the table.
 747       if(!(UTF_IS_TRAIL(element->prefix[j]))) {
 748         unsafeCPSet(t->unsafeCP, element->prefix[j]);
 749       }
 750     }
 751
 752     UChar tempPrefix = 0;
 753
 754     for(j = 0; j < /*nfcSize*/element->prefixSize/2; j++) { // prefixes are going to be looked up backwards
 755       // therefore, we will promptly reverse the prefix buffer...
 756       tempPrefix = *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1);
 757       *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1) = element->prefix[j];
 758       element->prefix[j] = tempPrefix;
 759     }
 760
 761 #ifdef UCOL_DEBUG
 762     fprintf(stdout, "Reversed: ");
 763     for(j=0; j<element->prefixSize; j++) {
 764       fprintf(stdout, "%04X ", element->prefix[j]);
 765     }
 766     fprintf(stdout, "%08X\n", element->mapCE);
 767 #endif
 768
 769     // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix
 770     if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
 771       unsafeCPSet(t->unsafeCP, element->cPoints[0]);
 772     }
 773
 774     // Maybe we need this... To handle prefixes completely in the forward direction...
 775     //if(element->cSize == 1) {
 776     //  if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
 777     //    ContrEndCPSet(t->contrEndCP, element->cPoints[0]);
 778     //  }
 779     //}
 780
 781     element->cPoints = element->prefix;
 782     element->cSize = element->prefixSize;
 783
 784     // Add the last char of the contraction to the contraction-end hash table.
 785     // unless it is a trail surrogate, which is handled algorithmically and
 786     // shouldn't be in the table
 787     if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
 788       ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
 789     }
 790
 791     // First we need to check if contractions starts with a surrogate
 792     UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
 793
 794     // If there are any Jamos in the contraction, we should turn on special
 795     // processing for Jamos
 796     if(UCOL_ISJAMO(element->prefix[0])) {
 797       t->image->jamoSpecial = TRUE;
 798     }
 799     /* then we need to deal with it */
 800     /* we could aready have something in table - or we might not */
 801
 802     if(!isPrefix(CE)) {
 803       /* if it wasn't contraction, we wouldn't end up here*/
 804       int32_t firstContractionOffset = 0;
 805       int32_t contractionOffset = 0;
 806       firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
 807       uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 808       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->prefix, newCE, status);
 809       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
 810       CE =  constructContractCE(SPEC_PROC_TAG, firstContractionOffset);
 811     } else { /* we are adding to existing contraction */
 812       /* there were already some elements in the table, so we need to add a new contraction */
 813       /* Two things can happen here: either the codepoint is already in the table, or it is not */
 814       int32_t position = uprv_cnttab_findCP(contractions, CE, *element->prefix, status);
 815       if(position > 0) {       /* if it is we just continue down the chain */
 816         uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
 817         uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
 818         uprv_cnttab_setContraction(contractions, CE, position, *(element->prefix), newCE, status);
 819       } else {                  /* if it isn't, we will have to create a new sequence */
 820         uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 821         uprv_cnttab_insertContraction(contractions, CE, *(element->prefix), element->mapCE, status);
 822       }
 823     }
 824
 825     element->cPoints = oldCP;
 826     element->cSize = oldCPSize;
 827
 828     return CE;
 829 }
 830
 831 // Note regarding surrogate handling: We are interested only in the single
 832 // or leading surrogates in a contraction. If a surrogate is somewhere else
 833 // in the contraction, it is going to be handled as a pair of code units,
 834 // as it doesn't affect the performance AND handling surrogates specially
 835 // would complicate code way too much.
 836 static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
 837                                  UCAElements *element, UErrorCode *status) {
 838     CntTable *contractions = t->contractions;
 839     UChar32 cp;
 840     uint32_t cpsize = 0;
 841
 842     contractions->currentTag = CONTRACTION_TAG;
 843
 844     // First we need to check if contractions starts with a surrogate
 845     UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
 846
 847     if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first
 848       uint32_t j = 0;
 849       for (j=1; j<element->cSize; j++) {   /* First add contraction chars to unsafe CP hash table */
 850         // Unless it is a trail surrogate, which is handled algoritmically and
 851         // shouldn't take up space in the table.
 852         if(!(UTF_IS_TRAIL(element->cPoints[j]))) {
 853           unsafeCPSet(t->unsafeCP, element->cPoints[j]);
 854         }
 855       }
 856       // Add the last char of the contraction to the contraction-end hash table.
 857       // unless it is a trail surrogate, which is handled algorithmically and
 858       // shouldn't be in the table
 859       if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
 860         ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
 861       }
 862
 863       // If there are any Jamos in the contraction, we should turn on special
 864       // processing for Jamos
 865       if(UCOL_ISJAMO(element->cPoints[0])) {
 866         t->image->jamoSpecial = TRUE;
 867       }
 868       /* then we need to deal with it */
 869       /* we could aready have something in table - or we might not */
 870       element->cPoints+=cpsize;
 871       element->cSize-=cpsize;
 872       if(!isContraction(CE)) {
 873         /* if it wasn't contraction, we wouldn't end up here*/
 874         int32_t firstContractionOffset = 0;
 875         int32_t contractionOffset = 0;
 876         firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
 877         uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 878         contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
 879         contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
 880         CE =  constructContractCE(CONTRACTION_TAG, firstContractionOffset);
 881       } else { /* we are adding to existing contraction */
 882         /* there were already some elements in the table, so we need to add a new contraction */
 883         /* Two things can happen here: either the codepoint is already in the table, or it is not */
 884         int32_t position = uprv_cnttab_findCP(contractions, CE, *element->cPoints, status);
 885         if(position > 0) {       /* if it is we just continue down the chain */
 886           uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
 887           uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
 888           uprv_cnttab_setContraction(contractions, CE, position, *(element->cPoints), newCE, status);
 889         } else {                  /* if it isn't, we will have to create a new sequence */
 890           uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 891           uprv_cnttab_insertContraction(contractions, CE, *(element->cPoints), newCE, status);
 892         }
 893       }
 894       element->cPoints-=cpsize;
 895       element->cSize+=cpsize;
 896       /*ucmpe32_set(t->mapping, cp, CE);*/
 897       utrie_set32(t->mapping, cp, CE);
 898     } else if(!isContraction(CE)) { /* this is just a surrogate, and there is no contraction */
 899       /*ucmpe32_set(t->mapping, cp, element->mapCE);*/
 900       utrie_set32(t->mapping, cp, element->mapCE);
 901     } else { /* fill out the first stage of the contraction with the surrogate CE */
 902       uprv_cnttab_changeContraction(contractions, CE, 0, element->mapCE, status);
 903       uprv_cnttab_changeContraction(contractions, CE, 0xFFFF, element->mapCE, status);
 904     }
 905     return CE;
 906 }
 907
 908
 909 static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status) {
 910     int32_t firstContractionOffset = 0;
 911     int32_t contractionOffset = 0;
 912 //    uint32_t contractionElement = UCOL_NOT_FOUND;
 913
 914     if(U_FAILURE(*status)) {
 915         return UCOL_NOT_FOUND;
 916     }
 917
 918     /* end of recursion */
 919     if(element->cSize == 1) {
 920       if(isCntTableElement(existingCE) && ((UColCETags)getCETag(existingCE) == contractions->currentTag)) {
 921         uprv_cnttab_changeContraction(contractions, existingCE, 0, element->mapCE, status);
 922         uprv_cnttab_changeContraction(contractions, existingCE, 0xFFFF, element->mapCE, status);
 923         return existingCE;
 924       } else {
 925         return element->mapCE; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */
 926       }
 927     }
 928
 929     /* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */
 930     /* for both backward and forward cycles */
 931
 932     /* we encountered either an empty space or a non-contraction element */
 933     /* this means we are constructing a new contraction sequence */
 934     element->cPoints++;
 935     element->cSize--;
 936     if(!isCntTableElement(existingCE)) {
 937       /* if it wasn't contraction, we wouldn't end up here*/
 938       firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, existingCE, status);
 939       uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 940       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
 941       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, status);
 942       existingCE =  constructContractCE(contractions->currentTag, firstContractionOffset);
 943     } else { /* we are adding to existing contraction */
 944       /* there were already some elements in the table, so we need to add a new contraction */
 945       /* Two things can happen here: either the codepoint is already in the table, or it is not */
 946       int32_t position = uprv_cnttab_findCP(contractions, existingCE, *element->cPoints, status);
 947       if(position > 0) {       /* if it is we just continue down the chain */
 948         uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, status);
 949         uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
 950         uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, status);
 951       } else {                  /* if it isn't, we will have to create a new sequence */
 952         uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 953         uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, status);
 954       }
 955     }
 956     element->cPoints--;
 957     element->cSize++;
 958     return existingCE;
 959 }
 960
 961 static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element, UErrorCode *status) {
 962   uint32_t CE = UCOL_NOT_FOUND;
 963   // This should add a completely ignorable element to the
 964   // unsafe table, so that backward iteration will skip
 965   // over it when treating contractions.
 966   uint32_t i = 0;
 967   if(element->mapCE == 0) {
 968     for(i = 0; i < element->cSize; i++) {
 969       if(!UTF_IS_TRAIL(element->cPoints[i])) {
 970         unsafeCPSet(t->unsafeCP, element->cPoints[i]);
 971       }
 972     }
 973   }
 974   if(element->cSize > 1) { /* we're adding a contraction */
 975     uint32_t i = 0;
 976     UChar32 cp;
 977
 978     UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp);
 979     /*CE = ucmpe32_get(t->mapping, cp);*/
 980     CE = utrie_get32(t->mapping, cp, NULL);
 981
 982     CE = uprv_uca_addContraction(t, CE, element, status);
 983   } else { /* easy case, */
 984     /*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/
 985     CE = utrie_get32(t->mapping, element->cPoints[0], NULL);
 986
 987     if( CE != UCOL_NOT_FOUND) {
 988       if(isCntTableElement(CE) /*isContraction(CE)*/) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */
 989         if(!isPrefix(element->mapCE)) { // we cannot reenter prefix elements - as we are going to create a dead loop
 990           // Only expansions and regular CEs can go here... Contractions will never happen in this place
 991             uprv_cnttab_setContraction(t->contractions, CE, 0, 0, element->mapCE, status);
 992             /* This loop has to change the CE at the end of contraction REDO!*/
 993             uprv_cnttab_changeLastCE(t->contractions, CE, element->mapCE, status);
 994         }
 995       } else {
 996         /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
 997         utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
 998 #ifdef UCOL_DEBUG
 999         fprintf(stderr, "Warning - trying to overwrite existing data %08X for cp %04X with %08X\n", CE, element->cPoints[0], element->CEs[0]);
1000         //*status = U_ILLEGAL_ARGUMENT_ERROR;
1001 #endif
1002       }
1003     } else {
1004       /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
1005       utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
1006     }
1007   }
1008   return CE;
1009 }
1010
1011 /* This adds a read element, while testing for existence */
1012 U_CAPI uint32_t  U_EXPORT2
1013 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) {
1014   ExpansionTable *expansions = t->expansions;
1015
1016   uint32_t i = 1;
1017   uint32_t expansion = 0;
1018   uint32_t CE;
1019
1020   if(U_FAILURE(*status)) {
1021       return 0xFFFF;
1022   }
1023
1024   element->mapCE = 0; // clear mapCE so that we can catch expansions
1025
1026   if(element->noOfCEs == 1) {
1027     element->mapCE = element->CEs[0];
1028   } else {
1029     /* ICU 2.1 long primaries */
1030     /* unfortunately, it looks like we have to look for a long primary here */
1031     /* since in canonical closure we are going to hit some long primaries from */
1032     /* the first phase, and they will come back as continuations/expansions */
1033     /* destroying the effect of the previous opitimization */
1034     /* A long primary is a three byte primary with starting secondaries and tertiaries */
1035     /* It can appear in long runs of only primary differences (like east Asian tailorings) */
1036     /* also, it should not be an expansion, as expansions would break with this */
1037     // This part came in from ucol_bld.cpp
1038     //if(tok->expansion == 0
1039       //&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1
1040       //&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) {
1041       /* we will construct a special CE that will go unchanged to the table */
1042     if(element->noOfCEs == 2 // a two CE expansion
1043       && isContinuation(element->CEs[1]) // which  is a continuation
1044       && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation,
1045       && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary
1046       && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary
1047       ) {
1048 #ifdef UCOL_DEBUG
1049       fprintf(stdout, "Long primary %04X\n", element->cPoints[0]);
1050 #endif
1051       element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special
1052         | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary
1053         | ((element->CEs[1]>>24) & 0xFF);   // third byte of primary
1054     } else {
1055       expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
1056         | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
1057         & 0xFFFFF0);
1058
1059       for(i = 1; i<element->noOfCEs; i++) {
1060         uprv_uca_addExpansion(expansions, element->CEs[i], status);
1061       }
1062       if(element->noOfCEs <= 0xF) {
1063         expansion |= element->noOfCEs;
1064       } else {
1065         uprv_uca_addExpansion(expansions, 0, status);
1066       }
1067       element->mapCE = expansion;
1068       uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1],
1069                                (uint8_t)element->noOfCEs,
1070                                t->maxExpansions,
1071                                status);
1072       if(UCOL_ISJAMO(element->cPoints[0])) {
1073         t->image->jamoSpecial = TRUE;
1074         uprv_uca_setMaxJamoExpansion(element->cPoints[0],
1075                                  element->CEs[element->noOfCEs - 1],
1076                                  (uint8_t)element->noOfCEs,
1077                                  t->maxJamoExpansions,
1078                                  status);
1079         if (U_FAILURE(*status)) {
1080             return 0;
1081         }
1082       }
1083     }
1084   }
1085
1086   // We treat digits differently - they are "uber special" and should be
1087   // processed differently if numeric collation is on.
1088   UChar32 uniChar = 0;
1089   //printElement(element);
1090   if ((element->cSize == 2) && U16_IS_LEAD(element->cPoints[0])){
1091       uniChar = U16_GET_SUPPLEMENTARY(element->cPoints[0], element->cPoints[1]);
1092   } else if (element->cSize == 1){
1093       uniChar = element->cPoints[0];
1094   }
1095
1096   // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only
1097   // one element to the expansion buffer. When we encounter a digit and we don't
1098   // do numeric collation, we will just pick the CE we have and break out of case
1099   // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked
1100   // a special, further processing will occur. If it's a simple CE, we'll return due
1101   // to how the loop is constructed.
1102   if (uniChar != 0 && u_isdigit(uniChar)){
1103       expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element
1104       if(element->mapCE) { // if there is an expansion, we'll pick it here
1105         expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4);
1106       } else {
1107         expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4);
1108       }
1109       element->mapCE = expansion;
1110
1111       // Need to go back to the beginning of the digit string if in the middle!
1112       if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars
1113         unsafeCPSet(t->unsafeCP, (UChar)uniChar);
1114       }
1115   }
1116
1117   // here we want to add the prefix structure.
1118   // I will try to process it as a reverse contraction, if possible.
1119   // prefix buffer is already reversed.
1120
1121   if(element->prefixSize!=0) {
1122     // We keep the seen prefix starter elements in a hashtable
1123     // we need it to be able to distinguish between the simple
1124     // codepoints and prefix starters. Also, we need to use it
1125     // for canonical closure.
1126
1127     UCAElements *composed = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1128     /* test for NULL */
1129     if (composed == NULL) {
1130         *status = U_MEMORY_ALLOCATION_ERROR;
1131         return 0;
1132     }
1133     uprv_memcpy(composed, element, sizeof(UCAElements));
1134     composed->cPoints = composed->uchars;
1135     composed->prefix = composed->prefixChars;
1136
1137     composed->prefixSize = unorm_normalize(element->prefix, element->prefixSize, UNORM_NFC, 0, composed->prefix, 128, status);
1138
1139
1140     if(t->prefixLookup != NULL) {
1141       UCAElements *uCE = (UCAElements *)uhash_get(t->prefixLookup, element);
1142       if(uCE != NULL) { // there is already a set of code points here
1143         element->mapCE = uprv_uca_addPrefix(t, uCE->mapCE, element, status);
1144       } else { // no code points, so this spot is clean
1145         element->mapCE = uprv_uca_addPrefix(t, UCOL_NOT_FOUND, element, status);
1146         uCE = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1147         /* test for NULL */
1148         if (uCE == NULL) {
1149             *status = U_MEMORY_ALLOCATION_ERROR;
1150             return 0;
1151         }
1152         uprv_memcpy(uCE, element, sizeof(UCAElements));
1153         uCE->cPoints = uCE->uchars;
1154         uhash_put(t->prefixLookup, uCE, uCE, status);
1155       }
1156       if(composed->prefixSize != element->prefixSize || uprv_memcmp(composed->prefix, element->prefix, element->prefixSize)) {
1157         // do it!
1158         composed->mapCE = uprv_uca_addPrefix(t, element->mapCE, composed, status);
1159       }
1160     }
1161     uprv_free(composed);
1162   }
1163
1164   // We need to use the canonical iterator here
1165   // the way we do it is to generate the canonically equivalent strings
1166   // for the contraction and then add the sequences that pass FCD check
1167   if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
1168     UnicodeString source(element->cPoints, element->cSize);
1169     CanonicalIterator it(source, *status);
1170     source = it.next();
1171     while(!source.isBogus()) {
1172       if(Normalizer::quickCheck(source, UNORM_FCD, *status) != UNORM_NO) {
1173         element->cSize = source.extract(element->cPoints, 128, *status);
1174         uprv_uca_finalizeAddition(t, element, status);
1175       }
1176       source = it.next();
1177     }
1178     CE = element->mapCE;
1179   } else {
1180       CE = uprv_uca_finalizeAddition(t, element, status);
1181   }
1182
1183   return CE;
1184 }
1185
1186
1187 /*void uprv_uca_getMaxExpansionJamo(CompactEIntArray       *mapping, */
1188 static void uprv_uca_getMaxExpansionJamo(UNewTrie       *mapping,
1189                                   MaxExpansionTable     *maxexpansion,
1190                                   MaxJamoExpansionTable *maxjamoexpansion,
1191                                   UBool                  jamospecial,
1192                                   UErrorCode            *status)
1193 {
1194   const uint32_t VBASE  = 0x1161;
1195   const uint32_t TBASE  = 0x11A8;
1196   const uint32_t VCOUNT = 21;
1197   const uint32_t TCOUNT = 28;
1198
1199   uint32_t v = VBASE + VCOUNT - 1;
1200   uint32_t t = TBASE + TCOUNT - 1;
1201   uint32_t ce;
1202
1203   while (v >= VBASE) {
1204       /*ce = ucmpe32_get(mapping, v);*/
1205       ce = utrie_get32(mapping, v, NULL);
1206       if (ce < UCOL_SPECIAL_FLAG) {
1207           uprv_uca_setMaxExpansion(ce, 2, maxexpansion, status);
1208       }
1209       v --;
1210   }
1211
1212   while (t >= TBASE)
1213   {
1214       /*ce = ucmpe32_get(mapping, t);*/
1215       ce = utrie_get32(mapping, t, NULL);
1216       if (ce < UCOL_SPECIAL_FLAG) {
1217           uprv_uca_setMaxExpansion(ce, 3, maxexpansion, status);
1218       }
1219       t --;
1220   }
1221   /*  According to the docs, 99% of the time, the Jamo will not be special */
1222   if (jamospecial) {
1223       /* gets the max expansion in all unicode characters */
1224       int     count    = maxjamoexpansion->position;
1225       uint8_t maxTSize = (uint8_t)(maxjamoexpansion->maxLSize +
1226                                    maxjamoexpansion->maxVSize +
1227                                    maxjamoexpansion->maxTSize);
1228       uint8_t maxVSize = (uint8_t)(maxjamoexpansion->maxLSize +
1229                                    maxjamoexpansion->maxVSize);
1230
1231       while (count > 0) {
1232           count --;
1233           if (*(maxjamoexpansion->isV + count) == TRUE) {
1234                 uprv_uca_setMaxExpansion(
1235                                    *(maxjamoexpansion->endExpansionCE + count),
1236                                    maxVSize, maxexpansion, status);
1237           }
1238           else {
1239                 uprv_uca_setMaxExpansion(
1240                                    *(maxjamoexpansion->endExpansionCE + count),
1241                                    maxTSize, maxexpansion, status);
1242           }
1243       }
1244   }
1245 }
1246
1247 U_CDECL_BEGIN
1248 static inline uint32_t U_CALLCONV
1249 getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset)
1250 {
1251   uint32_t value;
1252   uint32_t tag;
1253   UChar32 limit;
1254   UBool inBlockZero;
1255
1256   limit=start+0x400;
1257   while(start<limit) {
1258       value=utrie_get32(trie, start, &inBlockZero);
1259       tag = getCETag(value);
1260       if(inBlockZero == TRUE) {
1261           start+=UTRIE_DATA_BLOCK_LENGTH;
1262       } else if(!(isSpecial(value) && (tag == IMPLICIT_TAG || tag == NOT_FOUND_TAG))) {
1263         /* These are values that are starting in either UCA (IMPLICIT_TAG) or in the
1264          * tailorings (NOT_FOUND_TAG). Presence of these tags means that there is
1265          * nothing in this position and that it should be skipped.
1266          */
1267 #ifdef UCOL_DEBUG
1268         static int32_t count = 1;
1269         fprintf(stdout, "%i, Folded %08X, value %08X\n", count++, start, value);
1270 #endif
1271           return (uint32_t)(UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24) | offset);
1272       } else {
1273           ++start;
1274       }
1275   }
1276   return 0;
1277 }
1278 U_CDECL_END
1279
1280 #ifdef UCOL_DEBUG
1281 // This is a debug function to print the contents of a trie.
1282 // It is used in conjuction with the code around utrie_unserialize call
1283 void enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
1284   if(start<0x10000) {
1285     fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value);
1286   } else {
1287     fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value);
1288   }
1289 }
1290
1291 int32_t
1292 myGetFoldingOffset(uint32_t data) {
1293   if(data > UCOL_NOT_FOUND && getCETag(data) == SURROGATE_TAG) {
1294     return (data&0xFFFFFF);
1295   } else {
1296     return 0;
1297   }
1298 }
1299 #endif
1300
1301 U_CAPI UCATableHeader* U_EXPORT2
1302 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) {
1303     /*CompactEIntArray *mapping = t->mapping;*/
1304     UNewTrie *mapping = t->mapping;
1305     ExpansionTable *expansions = t->expansions;
1306     CntTable *contractions = t->contractions;
1307     MaxExpansionTable *maxexpansion = t->maxExpansions;
1308
1309     if(U_FAILURE(*status)) {
1310         return NULL;
1311     }
1312
1313     uint32_t beforeContractions = (uint32_t)((headersize+paddedsize(expansions->position*sizeof(uint32_t)))/sizeof(UChar));
1314
1315     int32_t contractionsSize = 0;
1316     contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status);
1317
1318     /* the following operation depends on the trie data. Therefore, we have to do it before */
1319     /* the trie is compacted */
1320     /* sets jamo expansions */
1321     uprv_uca_getMaxExpansionJamo(mapping, maxexpansion, t->maxJamoExpansions,
1322                                  t->image->jamoSpecial, status);
1323
1324     /*ucmpe32_compact(mapping);*/
1325     /*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/
1326     /*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/
1327     /*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/
1328
1329     // After setting the jamo expansions, compact the trie and get the needed size
1330     int32_t mappingSize = utrie_serialize(mapping, NULL, 0, getFoldedValue /*getFoldedValue*/, FALSE, status);
1331
1332     uint32_t tableOffset = 0;
1333     uint8_t *dataStart;
1334
1335     /* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */
1336
1337     uint32_t toAllocate =(uint32_t)(headersize+
1338                                     paddedsize(expansions->position*sizeof(uint32_t))+
1339                                     paddedsize(mappingSize)+
1340                                     paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t)))+
1341                                     //paddedsize(0x100*sizeof(uint32_t))  /* Latin1 is now included in the trie */
1342                                      /* maxexpansion array */
1343                                      + paddedsize(maxexpansion->position * sizeof(uint32_t)) +
1344                                      /* maxexpansion size array */
1345                                      paddedsize(maxexpansion->position * sizeof(uint8_t)) +
1346                                      paddedsize(UCOL_UNSAFECP_TABLE_SIZE) +   /*  Unsafe chars             */
1347                                      paddedsize(UCOL_UNSAFECP_TABLE_SIZE));    /*  Contraction Ending chars */
1348
1349
1350     dataStart = (uint8_t *)uprv_malloc(toAllocate);
1351     /* test for NULL */
1352     if (dataStart == NULL) {
1353         *status = U_MEMORY_ALLOCATION_ERROR;
1354         return NULL;
1355     }
1356
1357     UCATableHeader *myData = (UCATableHeader *)dataStart;
1358     // Please, do reset all the fields!
1359     uprv_memset(dataStart, 0, toAllocate);
1360     // Make sure we know this is reset
1361     myData->magic = UCOL_HEADER_MAGIC;
1362     myData->isBigEndian = U_IS_BIG_ENDIAN;
1363     myData->charSetFamily = U_CHARSET_FAMILY;
1364     myData->formatVersion[0] = UCA_FORMAT_VERSION_0;
1365     myData->formatVersion[1] = UCA_FORMAT_VERSION_1;
1366     myData->formatVersion[2] = UCA_FORMAT_VERSION_2;
1367     myData->formatVersion[3] = UCA_FORMAT_VERSION_3;
1368     myData->jamoSpecial = t->image->jamoSpecial;
1369
1370     // Don't copy stuff from UCA header!
1371     //uprv_memcpy(myData, t->image, sizeof(UCATableHeader));
1372
1373     myData->contractionSize = contractionsSize;
1374
1375     tableOffset += (uint32_t)(paddedsize(sizeof(UCATableHeader)));
1376
1377     myData->options = tableOffset;
1378     uprv_memcpy(dataStart+tableOffset, t->options, sizeof(UColOptionSet));
1379     tableOffset += (uint32_t)(paddedsize(sizeof(UColOptionSet)));
1380
1381     /* copy expansions */
1382     /*myData->expansion = (uint32_t *)dataStart+tableOffset;*/
1383     myData->expansion = tableOffset;
1384     uprv_memcpy(dataStart+tableOffset, expansions->CEs, expansions->position*sizeof(uint32_t));
1385     tableOffset += (uint32_t)(paddedsize(expansions->position*sizeof(uint32_t)));
1386
1387     /* contractions block */
1388     if(contractionsSize != 0) {
1389       /* copy contraction index */
1390       /*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/
1391       myData->contractionIndex = tableOffset;
1392       uprv_memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar));
1393       tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(UChar)));
1394
1395       /* copy contraction collation elements */
1396       /*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/
1397       myData->contractionCEs = tableOffset;
1398       uprv_memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t));
1399       tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t)));
1400     } else {
1401       myData->contractionIndex = 0;
1402       myData->contractionCEs = 0;
1403     }
1404
1405     /* copy mapping table */
1406     /*myData->mappingPosition = dataStart+tableOffset;*/
1407     /*myData->mappingPosition = tableOffset;*/
1408     /*uprv_memcpy(dataStart+tableOffset, flattened, mappingSize);*/
1409
1410     myData->mappingPosition = tableOffset;
1411     utrie_serialize(mapping, dataStart+tableOffset, toAllocate-tableOffset, getFoldedValue, FALSE, status);
1412 #ifdef UCOL_DEBUG
1413     // This is debug code to dump the contents of the trie. It needs two functions defined above
1414     {
1415       UTrie UCAt = { 0 };
1416       uint32_t trieWord;
1417       utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status);
1418       UCAt.getFoldingOffset = myGetFoldingOffset;
1419       if(U_SUCCESS(*status)) {
1420         utrie_enum(&UCAt, NULL, enumRange, NULL);
1421       }
1422       trieWord = UTRIE_GET32_FROM_LEAD(UCAt, 0xDC01)
1423     }
1424 #endif
1425     tableOffset += paddedsize(mappingSize);
1426
1427
1428     int32_t i = 0;
1429
1430     /* copy max expansion table */
1431     myData->endExpansionCE      = tableOffset;
1432     myData->endExpansionCECount = maxexpansion->position - 1;
1433     /* not copying the first element which is a dummy */
1434     uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1,
1435                 (maxexpansion->position - 1) * sizeof(uint32_t));
1436     tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint32_t)));
1437     myData->expansionCESize = tableOffset;
1438     uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1,
1439                 (maxexpansion->position - 1) * sizeof(uint8_t));
1440     tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint8_t)));
1441
1442     /* Unsafe chars table.  Finish it off, then copy it. */
1443     uprv_uca_unsafeCPAddCCNZ(t, status);
1444     if (t->UCA != 0) {              /* Or in unsafebits from UCA, making a combined table.    */
1445        for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1446            t->unsafeCP[i] |= t->UCA->unsafeCP[i];
1447        }
1448     }
1449     myData->unsafeCP = tableOffset;
1450     uprv_memcpy(dataStart + tableOffset, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
1451     tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1452
1453
1454     /* Finish building Contraction Ending chars hash table and then copy it out.  */
1455     if (t->UCA != 0) {              /* Or in unsafebits from UCA, making a combined table.    */
1456         for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1457             t->contrEndCP[i] |= t->UCA->contrEndCP[i];
1458         }
1459     }
1460     myData->contrEndCP = tableOffset;
1461     uprv_memcpy(dataStart + tableOffset, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
1462     tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1463
1464     if(tableOffset != toAllocate) {
1465 #ifdef UCOL_DEBUG
1466         fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset);
1467 #endif
1468         *status = U_INTERNAL_PROGRAM_ERROR;
1469         uprv_free(dataStart);
1470         return 0;
1471     }
1472
1473     myData->size = tableOffset;
1474     /* This should happen upon ressurection */
1475     /*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/
1476     /*uprv_mstrm_close(ms);*/
1477     return myData;
1478 }
1479
1480
1481 struct enumStruct {
1482   tempUCATable *t;
1483   UCollator *tempColl;
1484   UCollationElements* colEl;
1485   int32_t noOfClosures;
1486   UErrorCode *status;
1487 };
1488 U_CDECL_BEGIN
1489 static UBool U_CALLCONV
1490 _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1491
1492   if (type != U_UNASSIGNED && type != U_PRIVATE_USE_CHAR) { // if the range is assigned - we might ommit more categories later
1493     UErrorCode *status = ((enumStruct *)context)->status;
1494     tempUCATable *t = ((enumStruct *)context)->t;
1495     UCollator *tempColl = ((enumStruct *)context)->tempColl;
1496     UCollationElements* colEl = ((enumStruct *)context)->colEl;
1497     UCAElements el;
1498     UChar decomp[256] = { 0 };
1499     int32_t noOfDec = 0;
1500
1501     UChar32 u32 = 0;
1502     UChar comp[2];
1503     uint32_t len = 0;
1504
1505     for(u32 = start; u32 < limit; u32++) {
1506       noOfDec = unorm_getDecomposition(u32, FALSE, decomp, 256);
1507       //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1
1508         //|| (noOfDec == 1 && *decomp != (UChar)u32))
1509       if(noOfDec > 0) // if we're positive, that means there is no decomposition
1510       {
1511         len = 0;
1512         UTF_APPEND_CHAR_UNSAFE(comp, len, u32);
1513         if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) {
1514 #ifdef UCOL_DEBUG
1515           fprintf(stderr, "Closure: %08X -> ", u32);
1516           uint32_t i = 0;
1517           for(i = 0; i<noOfDec; i++) {
1518             fprintf(stderr, "%04X ", decomp[i]);
1519           }
1520           fprintf(stderr, "\n");
1521 #endif
1522           ((enumStruct *)context)->noOfClosures++;
1523           el.cPoints = decomp;
1524           el.cSize = noOfDec;
1525           el.noOfCEs = 0;
1526           el.prefix = el.prefixChars;
1527           el.prefixSize = 0;
1528
1529           UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el);
1530           el.cPoints = comp;
1531           el.cSize = len;
1532           el.prefix = el.prefixChars;
1533           el.prefixSize = 0;
1534           if(prefix == NULL) {
1535             el.noOfCEs = 0;
1536             ucol_setText(colEl, decomp, noOfDec, status);
1537             while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
1538               el.noOfCEs++;
1539             }
1540           } else {
1541             el.noOfCEs = 1;
1542             el.CEs[0] = prefix->mapCE;
1543             // This character uses a prefix. We have to add it
1544             // to the unsafe table, as it decomposed form is already
1545             // in. In Japanese, this happens for \u309e & \u30fe
1546             // Since unsafeCPSet is static in ucol_elm, we are going
1547             // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function
1548           }
1549           uprv_uca_addAnElement(t, &el, status);
1550         }
1551       }
1552     }
1553   }
1554   return TRUE;
1555 }
1556 U_CDECL_END
1557
1558 U_CAPI int32_t U_EXPORT2
1559 uprv_uca_canonicalClosure(tempUCATable *t, UErrorCode *status)
1560 {
1561   enumStruct context;
1562   context.noOfClosures = 0;
1563   if(U_SUCCESS(*status)) {
1564     UCollator *tempColl = NULL;
1565     tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status);
1566
1567     UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status);
1568     tempColl = ucol_initCollator(tempData, 0, t->UCA, status);
1569     uprv_uca_closeTempTable(tempTable);
1570
1571     if(U_SUCCESS(*status)) {
1572       tempColl->rb = NULL;
1573       tempColl->elements = NULL;
1574       tempColl->validLocale = NULL;
1575       tempColl->requestedLocale = NULL;
1576       tempColl->hasRealData = TRUE;
1577       tempColl->freeImageOnClose = TRUE;
1578     } else if(tempData != 0) {
1579       uprv_free(tempData);
1580     }
1581
1582     /* produce canonical closure */
1583     UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status);
1584
1585     context.t = t;
1586     context.tempColl = tempColl;
1587     context.colEl = colEl;
1588     context.status = status;
1589     u_enumCharTypes(_enumCategoryRangeClosureCategory, &context);
1590
1591     ucol_closeElements(colEl);
1592     ucol_close(tempColl);
1593   }
1594   return context.noOfClosures;
1595 }
1596
1597 #endif /* #if !UCONFIG_NO_COLLATION */
1598
1599