icuSources/i18n/ucol_elm.cpp

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 2001-2004, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  ucaelems.cpp
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created 02/22/2001
  14 *   created by: Vladimir Weinstein
  15 *
  16 *   This program reads the Franctional UCA table and generates
  17 *   internal format for UCA table as well as inverse UCA table.
  18 *   It then writes binary files containing the data: ucadata.dat
  19 *   & invuca.dat
  20 *
  21 *   date        name       comments
  22 *   03/02/2001  synwee     added setMaxExpansion
  23 *   03/07/2001  synwee     merged UCA's maxexpansion and tailoring's
  24 */
  25
  26 #include "unicode/utypes.h"
  27
  28 #if !UCONFIG_NO_COLLATION
  29
  30 #include "unicode/uchar.h"
  31 #include "unicode/unistr.h"
  32 #include "unicode/ucoleitr.h"
  33 #include "unicode/normlzr.h"
  34 #include "ucol_elm.h"
  35 #include "unormimp.h"
  36 #include "unicode/caniter.h"
  37 #include "cmemory.h"
  38
  39 U_NAMESPACE_BEGIN
  40
  41 static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status);
  42
  43 U_CDECL_BEGIN
  44 static int32_t U_EXPORT2 U_CALLCONV
  45 prefixLookupHash(const UHashTok e) {
  46   UCAElements *element = (UCAElements *)e.pointer;
  47   UChar buf[256];
  48   UHashTok key;
  49   key.pointer = buf;
  50   uprv_memcpy(buf, element->cPoints, element->cSize*sizeof(UChar));
  51   buf[element->cSize] = 0;
  52   //key.pointer = element->cPoints;
  53   //element->cPoints[element->cSize] = 0;
  54   return uhash_hashUChars(key);
  55 }
  56
  57 static int8_t U_EXPORT2 U_CALLCONV
  58 prefixLookupComp(const UHashTok e1, const UHashTok e2) {
  59   UCAElements *element1 = (UCAElements *)e1.pointer;
  60   UCAElements *element2 = (UCAElements *)e2.pointer;
  61
  62   UChar buf1[256];
  63   UHashTok key1;
  64   key1.pointer = buf1;
  65   uprv_memcpy(buf1, element1->cPoints, element1->cSize*sizeof(UChar));
  66   buf1[element1->cSize] = 0;
  67
  68   UChar buf2[256];
  69   UHashTok key2;
  70   key2.pointer = buf2;
  71   uprv_memcpy(buf2, element2->cPoints, element2->cSize*sizeof(UChar));
  72   buf2[element2->cSize] = 0;
  73
  74   return uhash_compareUChars(key1, key2);
  75 }
  76 U_CDECL_END
  77
  78 static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value, UErrorCode *status) {
  79     if(U_FAILURE(*status)) {
  80         return 0;
  81     }
  82     if(expansions->CEs == NULL) {
  83         expansions->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
  84         /* test for NULL */
  85         if (expansions->CEs == NULL) {
  86             *status = U_MEMORY_ALLOCATION_ERROR;
  87             return 0;
  88         }
  89         expansions->size = INIT_EXP_TABLE_SIZE;
  90         expansions->position = 0;
  91     }
  92
  93     if(expansions->position == expansions->size) {
  94         uint32_t *newData = (uint32_t *)uprv_realloc(expansions->CEs, 2*expansions->size*sizeof(uint32_t));
  95         if(newData == NULL) {
  96 #ifdef UCOL_DEBUG
  97             fprintf(stderr, "out of memory for expansions\n");
  98 #endif
  99             *status = U_MEMORY_ALLOCATION_ERROR;
 100             return -1;
 101         }
 102         expansions->CEs = newData;
 103         expansions->size *= 2;
 104     }
 105
 106     expansions->CEs[expansions->position] = value;
 107     return(expansions->position++);
 108 }
 109
 110 U_CAPI tempUCATable*  U_EXPORT2
 111 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) {
 112   tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
 113   /* test for NULL */
 114   if (t == NULL) {
 115       *status = U_MEMORY_ALLOCATION_ERROR;
 116       return NULL;
 117   }
 118   MaxExpansionTable *maxet  = (MaxExpansionTable *)uprv_malloc(
 119                                                    sizeof(MaxExpansionTable));
 120   /* test for NULL */
 121   if (maxet == NULL) {
 122       *status = U_MEMORY_ALLOCATION_ERROR;
 123       uprv_free(t);
 124       return NULL;
 125   }
 126   MaxJamoExpansionTable *maxjet = (MaxJamoExpansionTable *)uprv_malloc(
 127                                                sizeof(MaxJamoExpansionTable));
 128   /* test for NULL */
 129   if (maxjet == NULL) {
 130       *status = U_MEMORY_ALLOCATION_ERROR;
 131       uprv_free(t);
 132       uprv_free(maxet);
 133       return NULL;
 134   }
 135   t->image = image;
 136   t->options = opts;
 137
 138   t->UCA = UCA;
 139   t->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
 140   /* test for NULL */
 141   if (t->expansions == NULL) {
 142       *status = U_MEMORY_ALLOCATION_ERROR;
 143       uprv_free(t);
 144       uprv_free(maxet);
 145       uprv_free(maxjet);
 146       return NULL;
 147   }
 148   uprv_memset(t->expansions, 0, sizeof(ExpansionTable));
 149   /*t->mapping = ucmpe32_open(UCOL_SPECIAL_FLAG | (initTag<<24), UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24), UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG<<24), status);*/
 150   /*t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1*/
 151
 152   t->mapping = utrie_open(NULL, NULL, 0x100000,
 153                           UCOL_SPECIAL_FLAG | (initTag<<24),
 154                           UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24),
 155                           TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
 156   t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, status);
 157   uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
 158
 159   t->contractions = uprv_cnttab_open(t->mapping, status);
 160
 161   /* copy UCA's maxexpansion and merge as we go along */
 162   t->maxExpansions       = maxet;
 163   if (UCA != NULL) {
 164     /* adding an extra initial value for easier manipulation */
 165     maxet->size            = (UCA->lastEndExpansionCE - UCA->endExpansionCE)
 166                              + 2;
 167     maxet->position        = maxet->size - 1;
 168     maxet->endExpansionCE  =
 169                       (uint32_t *)uprv_malloc(sizeof(uint32_t) * maxet->size);
 170     /* test for NULL */
 171     if (maxet->endExpansionCE == NULL) {
 172         *status = U_MEMORY_ALLOCATION_ERROR;
 173         return NULL;
 174     }
 175     maxet->expansionCESize =
 176                         (uint8_t *)uprv_malloc(sizeof(uint8_t) * maxet->size);
 177     /* test for NULL */
 178     if (maxet->expansionCESize == NULL) {
 179         *status = U_MEMORY_ALLOCATION_ERROR;
 180         uprv_free(maxet->endExpansionCE);
 181         return NULL;
 182     }
 183     /* initialized value */
 184     *(maxet->endExpansionCE)  = 0;
 185     *(maxet->expansionCESize) = 0;
 186     uprv_memcpy(maxet->endExpansionCE + 1, UCA->endExpansionCE,
 187                 sizeof(uint32_t) * (maxet->size - 1));
 188     uprv_memcpy(maxet->expansionCESize + 1, UCA->expansionCESize,
 189                 sizeof(uint8_t) * (maxet->size - 1));
 190   }
 191   else {
 192     maxet->size     = 0;
 193   }
 194   t->maxJamoExpansions = maxjet;
 195   maxjet->endExpansionCE = NULL;
 196   maxjet->isV = NULL;
 197   maxjet->size = 0;
 198   maxjet->position = 0;
 199   maxjet->maxLSize = 1;
 200   maxjet->maxVSize = 1;
 201   maxjet->maxTSize = 1;
 202
 203   t->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 204   /* test for NULL */
 205   if (t->unsafeCP == NULL) {
 206       *status = U_MEMORY_ALLOCATION_ERROR;
 207       return NULL;
 208   }
 209   t->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 210   /* test for NULL */
 211   if (t->contrEndCP == NULL) {
 212       *status = U_MEMORY_ALLOCATION_ERROR;
 213       uprv_free(t->unsafeCP);
 214       return NULL;
 215   }
 216   uprv_memset(t->unsafeCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
 217   uprv_memset(t->contrEndCP, 0, UCOL_UNSAFECP_TABLE_SIZE);
 218 return t;
 219 }
 220
 221 U_CAPI tempUCATable* U_EXPORT2
 222 uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) {
 223   if(U_FAILURE(*status)) {
 224     return NULL;
 225   }
 226
 227   tempUCATable *r = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
 228   /* test for NULL */
 229   if (r == NULL) {
 230     *status = U_MEMORY_ALLOCATION_ERROR;
 231     return NULL;
 232   }
 233   uprv_memset(r, 0, sizeof(tempUCATable));
 234
 235   /* mapping */
 236   if(t->mapping != NULL) {
 237     /*r->mapping = ucmpe32_clone(t->mapping, status);*/
 238     r->mapping = utrie_clone(NULL, t->mapping, NULL, 0);
 239   }
 240
 241   // a hashing clone function would be very nice. We have none currently...
 242   // However, we should be good, as closing should not produce any prefixed elements.
 243   r->prefixLookup = NULL; // prefixes are not used in closing
 244
 245   /* expansions */
 246   if(t->expansions != NULL) {
 247     r->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable));
 248     /* test for NULL */
 249     if (r->expansions == NULL) {
 250         *status = U_MEMORY_ALLOCATION_ERROR;
 251         return NULL;
 252     }
 253     r->expansions->position = t->expansions->position;
 254     r->expansions->size = t->expansions->size;
 255     if(t->expansions->CEs != NULL) {
 256       r->expansions->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->expansions->size);
 257       /* test for NULL */
 258       if (r->expansions->CEs == NULL) {
 259           *status = U_MEMORY_ALLOCATION_ERROR;
 260           return NULL;
 261       }
 262       uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position);
 263     } else {
 264       r->expansions->CEs = NULL;
 265     }
 266   }
 267
 268   if(t->contractions != NULL) {
 269     r->contractions = uprv_cnttab_clone(t->contractions, status);
 270     r->contractions->mapping = r->mapping;
 271   }
 272
 273   if(t->maxExpansions != NULL) {
 274     r->maxExpansions = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable));
 275     /* test for NULL */
 276     if (r->maxExpansions == NULL) {
 277         *status = U_MEMORY_ALLOCATION_ERROR;
 278         return NULL;
 279     }
 280     r->maxExpansions->size = t->maxExpansions->size;
 281     r->maxExpansions->position = t->maxExpansions->position;
 282     if(t->maxExpansions->endExpansionCE != NULL) {
 283       r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size);
 284       /* test for NULL */
 285       if (r->maxExpansions->endExpansionCE == NULL) {
 286           *status = U_MEMORY_ALLOCATION_ERROR;
 287           return NULL;
 288       }
 289       uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t));
 290     } else {
 291       r->maxExpansions->endExpansionCE = NULL;
 292     }
 293     if(t->maxExpansions->expansionCESize != NULL) {
 294       r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size);
 295       /* test for NULL */
 296       if (r->maxExpansions->expansionCESize == NULL) {
 297           *status = U_MEMORY_ALLOCATION_ERROR;
 298           return NULL;
 299       }
 300       uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t));
 301     } else {
 302       r->maxExpansions->expansionCESize = NULL;
 303     }
 304   }
 305
 306   if(t->maxJamoExpansions != NULL) {
 307     r->maxJamoExpansions = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable));
 308     /* test for NULL */
 309     if (r->maxJamoExpansions == NULL) {
 310         *status = U_MEMORY_ALLOCATION_ERROR;
 311         return NULL;
 312     }
 313     r->maxJamoExpansions->size = t->maxJamoExpansions->size;
 314     r->maxJamoExpansions->position = t->maxJamoExpansions->position;
 315     r->maxJamoExpansions->maxLSize = t->maxJamoExpansions->maxLSize;
 316     r->maxJamoExpansions->maxVSize = t->maxJamoExpansions->maxVSize;
 317     r->maxJamoExpansions->maxTSize = t->maxJamoExpansions->maxTSize;
 318     if(t->maxJamoExpansions->size != 0) {
 319       r->maxJamoExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxJamoExpansions->size);
 320       /* test for NULL */
 321       if (r->maxJamoExpansions->endExpansionCE == NULL) {
 322           *status = U_MEMORY_ALLOCATION_ERROR;
 323           return NULL;
 324       }
 325       uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t));
 326       r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size);
 327       /* test for NULL */
 328       if (r->maxJamoExpansions->isV == NULL) {
 329           *status = U_MEMORY_ALLOCATION_ERROR;
 330           return NULL;
 331       }
 332       uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool));
 333     } else {
 334       r->maxJamoExpansions->endExpansionCE = NULL;
 335       r->maxJamoExpansions->isV = NULL;
 336     }
 337   }
 338
 339   if(t->unsafeCP != NULL) {
 340     r->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 341     /* test for NULL */
 342     if (r->unsafeCP == NULL) {
 343         *status = U_MEMORY_ALLOCATION_ERROR;
 344         return NULL;
 345     }
 346     uprv_memcpy(r->unsafeCP, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
 347   }
 348
 349   if(t->contrEndCP != NULL) {
 350     r->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE);
 351     /* test for NULL */
 352     if (r->contrEndCP == NULL) {
 353         *status = U_MEMORY_ALLOCATION_ERROR;
 354         return NULL;
 355     }
 356     uprv_memcpy(r->contrEndCP, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
 357   }
 358
 359   r->UCA = t->UCA;
 360   r->image = t->image;
 361   r->options = t->options;
 362
 363   return r;
 364 }
 365
 366
 367 U_CAPI void  U_EXPORT2
 368 uprv_uca_closeTempTable(tempUCATable *t) {
 369   if(t != NULL) {
 370     uprv_free(t->expansions->CEs);
 371     uprv_free(t->expansions);
 372     if(t->contractions != NULL) {
 373       uprv_cnttab_close(t->contractions);
 374     }
 375     /*ucmpe32_close(t->mapping);*/
 376     utrie_close(t->mapping);
 377
 378     if(t->prefixLookup != NULL) {
 379       uhash_close(t->prefixLookup);
 380     }
 381
 382     uprv_free(t->maxExpansions->endExpansionCE);
 383     uprv_free(t->maxExpansions->expansionCESize);
 384     uprv_free(t->maxExpansions);
 385
 386     if (t->maxJamoExpansions->size > 0) {
 387       uprv_free(t->maxJamoExpansions->endExpansionCE);
 388       uprv_free(t->maxJamoExpansions->isV);
 389     }
 390     uprv_free(t->maxJamoExpansions);
 391
 392     uprv_free(t->unsafeCP);
 393     uprv_free(t->contrEndCP);
 394
 395     uprv_free(t);
 396   }
 397 }
 398
 399 /**
 400 * Looks for the maximum length of all expansion sequences ending with the same
 401 * collation element. The size required for maxexpansion and maxsize is
 402 * returned if the arrays are too small.
 403 * @param endexpansion the last expansion collation element to be added
 404 * @param expansionsize size of the expansion
 405 * @param maxexpansion data structure to store the maximum expansion data.
 406 * @param status error status
 407 * @returns size of the maxexpansion and maxsize used.
 408 */
 409 static int uprv_uca_setMaxExpansion(uint32_t           endexpansion,
 410                              uint8_t            expansionsize,
 411                              MaxExpansionTable *maxexpansion,
 412                              UErrorCode        *status)
 413 {
 414   if (maxexpansion->size == 0) {
 415     /* we'll always make the first element 0, for easier manipulation */
 416     maxexpansion->endExpansionCE =
 417                (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(int32_t));
 418     /* test for NULL */
 419     if (maxexpansion->endExpansionCE == NULL) {
 420         *status = U_MEMORY_ALLOCATION_ERROR;
 421         return 0;
 422     }
 423     *(maxexpansion->endExpansionCE) = 0;
 424     maxexpansion->expansionCESize =
 425                (uint8_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint8_t));
 426     /* test for NULL */;
 427     if (maxexpansion->expansionCESize == NULL) {
 428         *status = U_MEMORY_ALLOCATION_ERROR;
 429         return 0;
 430     }
 431     *(maxexpansion->expansionCESize) = 0;
 432     maxexpansion->size     = INIT_EXP_TABLE_SIZE;
 433     maxexpansion->position = 0;
 434   }
 435
 436   if (maxexpansion->position + 1 == maxexpansion->size) {
 437     uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
 438                                    2 * maxexpansion->size * sizeof(uint32_t));
 439     uint8_t  *neweces = (uint8_t *)uprv_realloc(maxexpansion->expansionCESize,
 440                                     2 * maxexpansion->size * sizeof(uint8_t));
 441     if (neweece == NULL || neweces == NULL) {
 442 #ifdef UCOL_DEBUG
 443       fprintf(stderr, "out of memory for maxExpansions\n");
 444 #endif
 445       *status = U_MEMORY_ALLOCATION_ERROR;
 446       return -1;
 447     }
 448     maxexpansion->endExpansionCE  = neweece;
 449     maxexpansion->expansionCESize = neweces;
 450     maxexpansion->size *= 2;
 451   }
 452
 453   uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
 454   uint8_t  *pexpansionsize  = maxexpansion->expansionCESize;
 455   int      pos              = maxexpansion->position;
 456
 457   uint32_t *start = pendexpansionce;
 458   uint32_t *limit = pendexpansionce + pos;
 459
 460   /* using binary search to determine if last expansion element is
 461      already in the array */
 462   uint32_t *mid;
 463   int       result = -1;
 464   while (start < limit - 1) {
 465     mid = start + ((limit - start) >> 1);
 466     if (endexpansion <= *mid) {
 467       limit = mid;
 468     }
 469     else {
 470       start = mid;
 471     }
 472   }
 473
 474   if (*start == endexpansion) {
 475     result = start - pendexpansionce;
 476   }
 477   else
 478     if (*limit == endexpansion) {
 479       result = limit - pendexpansionce;
 480     }
 481
 482   if (result > -1) {
 483     /* found the ce in expansion, we'll just modify the size if it is
 484        smaller */
 485     uint8_t *currentsize = pexpansionsize + result;
 486     if (*currentsize < expansionsize) {
 487       *currentsize = expansionsize;
 488     }
 489   }
 490   else {
 491     /* we'll need to squeeze the value into the array.
 492        initial implementation. */
 493     /* shifting the subarray down by 1 */
 494     int      shiftsize     = (pendexpansionce + pos) - start;
 495     uint32_t *shiftpos     = start + 1;
 496     uint8_t  *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce);
 497
 498     /* okay need to rearrange the array into sorted order */
 499     if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */
 500       *(pendexpansionce + pos + 1) = endexpansion;
 501       *(pexpansionsize + pos + 1)  = expansionsize;
 502     }
 503     else {
 504       uprv_memmove(shiftpos + 1, shiftpos, shiftsize * sizeof(int32_t));
 505       uprv_memmove(sizeshiftpos + 1, sizeshiftpos,
 506                                                 shiftsize * sizeof(uint8_t));
 507       *shiftpos     = endexpansion;
 508       *sizeshiftpos = expansionsize;
 509     }
 510     maxexpansion->position ++;
 511
 512 #ifdef UCOL_DEBUG
 513     int   temp;
 514     UBool found = FALSE;
 515     for (temp = 0; temp < maxexpansion->position; temp ++) {
 516       if (pendexpansionce[temp] >= pendexpansionce[temp + 1]) {
 517         fprintf(stderr, "expansions %d\n", temp);
 518       }
 519       if (pendexpansionce[temp] == endexpansion) {
 520         found =TRUE;
 521         if (pexpansionsize[temp] < expansionsize) {
 522           fprintf(stderr, "expansions size %d\n", temp);
 523         }
 524       }
 525     }
 526     if (pendexpansionce[temp] == endexpansion) {
 527         found =TRUE;
 528         if (pexpansionsize[temp] < expansionsize) {
 529           fprintf(stderr, "expansions size %d\n", temp);
 530         }
 531       }
 532     if (!found)
 533       fprintf(stderr, "expansion not found %d\n", temp);
 534 #endif
 535   }
 536
 537   return maxexpansion->position;
 538 }
 539
 540 /**
 541 * Sets the maximum length of all jamo expansion sequences ending with the same
 542 * collation element. The size required for maxexpansion and maxsize is
 543 * returned if the arrays are too small.
 544 * @param ch the jamo codepoint
 545 * @param endexpansion the last expansion collation element to be added
 546 * @param expansionsize size of the expansion
 547 * @param maxexpansion data structure to store the maximum expansion data.
 548 * @param status error status
 549 * @returns size of the maxexpansion and maxsize used.
 550 */
 551 static int uprv_uca_setMaxJamoExpansion(UChar                  ch,
 552                                  uint32_t               endexpansion,
 553                                  uint8_t                expansionsize,
 554                                  MaxJamoExpansionTable *maxexpansion,
 555                                  UErrorCode            *status)
 556 {
 557   UBool isV = TRUE;
 558   if (((uint32_t)ch - 0x1100) <= (0x1112 - 0x1100)) {
 559       /* determines L for Jamo, doesn't need to store this since it is never
 560       at the end of a expansion */
 561       if (maxexpansion->maxLSize < expansionsize) {
 562           maxexpansion->maxLSize = expansionsize;
 563       }
 564       return maxexpansion->position;
 565   }
 566
 567   if (((uint32_t)ch - 0x1161) <= (0x1175 - 0x1161)) {
 568       /* determines V for Jamo */
 569       if (maxexpansion->maxVSize < expansionsize) {
 570           maxexpansion->maxVSize = expansionsize;
 571       }
 572   }
 573
 574   if (((uint32_t)ch - 0x11A8) <= (0x11C2 - 0x11A8)) {
 575       isV = FALSE;
 576       /* determines T for Jamo */
 577       if (maxexpansion->maxTSize < expansionsize) {
 578           maxexpansion->maxTSize = expansionsize;
 579       }
 580   }
 581
 582   if (maxexpansion->size == 0) {
 583     /* we'll always make the first element 0, for easier manipulation */
 584     maxexpansion->endExpansionCE =
 585                (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint32_t));
 586     /* test for NULL */;
 587     if (maxexpansion->endExpansionCE == NULL) {
 588         *status = U_MEMORY_ALLOCATION_ERROR;
 589         return 0;
 590     }
 591     *(maxexpansion->endExpansionCE) = 0;
 592     maxexpansion->isV =
 593                  (UBool *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(UBool));
 594     /* test for NULL */;
 595     if (maxexpansion->isV == NULL) {
 596         *status = U_MEMORY_ALLOCATION_ERROR;
 597         return 0;
 598     }
 599     *(maxexpansion->isV) = 0;
 600     maxexpansion->size     = INIT_EXP_TABLE_SIZE;
 601     maxexpansion->position = 0;
 602   }
 603
 604   if (maxexpansion->position + 1 == maxexpansion->size) {
 605     uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE,
 606                                    2 * maxexpansion->size * sizeof(uint32_t));
 607     UBool    *newisV  = (UBool *)uprv_realloc(maxexpansion->isV,
 608                                    2 * maxexpansion->size * sizeof(UBool));
 609     if (neweece == NULL || newisV == NULL) {
 610 #ifdef UCOL_DEBUG
 611       fprintf(stderr, "out of memory for maxExpansions\n");
 612 #endif
 613       *status = U_MEMORY_ALLOCATION_ERROR;
 614       return -1;
 615     }
 616     maxexpansion->endExpansionCE  = neweece;
 617     maxexpansion->isV             = newisV;
 618     maxexpansion->size *= 2;
 619   }
 620
 621   uint32_t *pendexpansionce = maxexpansion->endExpansionCE;
 622   int       pos             = maxexpansion->position;
 623
 624   while (pos > 0) {
 625       pos --;
 626       if (*(pendexpansionce + pos) == endexpansion) {
 627           return maxexpansion->position;
 628       }
 629   }
 630
 631   *(pendexpansionce + maxexpansion->position) = endexpansion;
 632   *(maxexpansion->isV + maxexpansion->position) = isV;
 633   maxexpansion->position ++;
 634
 635   return maxexpansion->position;
 636 }
 637
 638
 639 static void ContrEndCPSet(uint8_t *table, UChar c) {
 640     uint32_t    hash;
 641     uint8_t     *htByte;
 642
 643     hash = c;
 644     if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
 645         hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
 646     }
 647     htByte = &table[hash>>3];
 648     *htByte |= (1 << (hash & 7));
 649 }
 650
 651
 652 static void unsafeCPSet(uint8_t *table, UChar c) {
 653     uint32_t    hash;
 654     uint8_t     *htByte;
 655
 656     hash = c;
 657     if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
 658         if (hash >= 0xd800 && hash <= 0xf8ff) {
 659             /*  Part of a surrogate, or in private use area.            */
 660             /*   These don't go in the table                            */
 661             return;
 662         }
 663         hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
 664     }
 665     htByte = &table[hash>>3];
 666     *htByte |= (1 << (hash & 7));
 667 }
 668
 669
 670 /*  to the UnsafeCP hash table, add all chars with combining class != 0     */
 671 static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
 672
 673     UChar              c;
 674     uint16_t           fcd;     // Hi byte is lead combining class.
 675                                 // lo byte is trailing combing class.
 676     const uint16_t    *fcdTrieData;
 677
 678     fcdTrieData = unorm_getFCDTrie(status);
 679     if (U_FAILURE(*status)) {
 680         return;
 681     }
 682
 683     for (c=0; c<0xffff; c++) {
 684         fcd = unorm_getFCD16(fcdTrieData, c);
 685         if (fcd >= 0x100 ||               // if the leading combining class(c) > 0 ||
 686             (UTF_IS_LEAD(c) && fcd != 0)) //    c is a leading surrogate with some FCD data
 687                 unsafeCPSet(t->unsafeCP, c);
 688     }
 689
 690     if(t->prefixLookup != NULL) {
 691       int32_t i = -1;
 692       const UHashElement *e = NULL;
 693       UCAElements *element = NULL;
 694       UChar NFCbuf[256];
 695       uint32_t NFCbufLen = 0;
 696       while((e = uhash_nextElement(t->prefixLookup, &i)) != NULL) {
 697         element = (UCAElements *)e->value.pointer;
 698         // codepoints here are in the NFD form. We need to add the
 699         // first code point of the NFC form to unsafe, because
 700         // strcoll needs to backup over them.
 701         NFCbufLen = unorm_normalize(element->cPoints, element->cSize, UNORM_NFC, 0,
 702           NFCbuf, 256, status);
 703         unsafeCPSet(t->unsafeCP, NFCbuf[0]);
 704       }
 705     }
 706 }
 707
 708 static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
 709                                  UCAElements *element, UErrorCode *status) {
 710   // currently the longest prefix we're supporting in Japanese is two characters
 711   // long. Although this table could quite easily mimic complete contraction stuff
 712   // there is no good reason to make a general solution, as it would require some
 713   // error prone messing.
 714     CntTable *contractions = t->contractions;
 715     UChar32 cp;
 716     uint32_t cpsize = 0;
 717     UChar *oldCP = element->cPoints;
 718     uint32_t oldCPSize = element->cSize;
 719
 720
 721     contractions->currentTag = SPEC_PROC_TAG;
 722
 723     // here, we will normalize & add prefix to the table.
 724     uint32_t j = 0;
 725 #ifdef UCOL_DEBUG
 726     for(j=0; j<element->cSize; j++) {
 727       fprintf(stdout, "CP: %04X ", element->cPoints[j]);
 728     }
 729     fprintf(stdout, "El: %08X Pref: ", CE);
 730     for(j=0; j<element->prefixSize; j++) {
 731       fprintf(stdout, "%04X ", element->prefix[j]);
 732     }
 733     fprintf(stdout, "%08X ", element->mapCE);
 734 #endif
 735
 736     for (j = 1; j<element->prefixSize; j++) {   /* First add NFD prefix chars to unsafe CP hash table */
 737       // Unless it is a trail surrogate, which is handled algoritmically and
 738       // shouldn't take up space in the table.
 739       if(!(UTF_IS_TRAIL(element->prefix[j]))) {
 740         unsafeCPSet(t->unsafeCP, element->prefix[j]);
 741       }
 742     }
 743
 744     UChar tempPrefix = 0;
 745
 746     for(j = 0; j < /*nfcSize*/element->prefixSize/2; j++) { // prefixes are going to be looked up backwards
 747       // therefore, we will promptly reverse the prefix buffer...
 748       tempPrefix = *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1);
 749       *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1) = element->prefix[j];
 750       element->prefix[j] = tempPrefix;
 751     }
 752
 753 #ifdef UCOL_DEBUG
 754     fprintf(stdout, "Reversed: ");
 755     for(j=0; j<element->prefixSize; j++) {
 756       fprintf(stdout, "%04X ", element->prefix[j]);
 757     }
 758     fprintf(stdout, "%08X\n", element->mapCE);
 759 #endif
 760
 761     // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix
 762     if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
 763       unsafeCPSet(t->unsafeCP, element->cPoints[0]);
 764     }
 765
 766     // Maybe we need this... To handle prefixes completely in the forward direction...
 767     //if(element->cSize == 1) {
 768     //  if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
 769     //    ContrEndCPSet(t->contrEndCP, element->cPoints[0]);
 770     //  }
 771     //}
 772
 773     element->cPoints = element->prefix;
 774     element->cSize = element->prefixSize;
 775
 776     // Add the last char of the contraction to the contraction-end hash table.
 777     // unless it is a trail surrogate, which is handled algorithmically and
 778     // shouldn't be in the table
 779     if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
 780       ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
 781     }
 782
 783     // First we need to check if contractions starts with a surrogate
 784     UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
 785
 786     // If there are any Jamos in the contraction, we should turn on special
 787     // processing for Jamos
 788     if(UCOL_ISJAMO(element->prefix[0])) {
 789       t->image->jamoSpecial = TRUE;
 790     }
 791     /* then we need to deal with it */
 792     /* we could aready have something in table - or we might not */
 793
 794     if(!isPrefix(CE)) {
 795       /* if it wasn't contraction, we wouldn't end up here*/
 796       int32_t firstContractionOffset = 0;
 797       int32_t contractionOffset = 0;
 798       firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
 799       uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 800       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->prefix, newCE, status);
 801       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
 802       CE =  constructContractCE(SPEC_PROC_TAG, firstContractionOffset);
 803     } else { /* we are adding to existing contraction */
 804       /* there were already some elements in the table, so we need to add a new contraction */
 805       /* Two things can happen here: either the codepoint is already in the table, or it is not */
 806       int32_t position = uprv_cnttab_findCP(contractions, CE, *element->prefix, status);
 807       if(position > 0) {       /* if it is we just continue down the chain */
 808         uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
 809         uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
 810         uprv_cnttab_setContraction(contractions, CE, position, *(element->prefix), newCE, status);
 811       } else {                  /* if it isn't, we will have to create a new sequence */
 812         uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 813         uprv_cnttab_insertContraction(contractions, CE, *(element->prefix), element->mapCE, status);
 814       }
 815     }
 816
 817     element->cPoints = oldCP;
 818     element->cSize = oldCPSize;
 819
 820     return CE;
 821 }
 822
 823 // Note regarding surrogate handling: We are interested only in the single
 824 // or leading surrogates in a contraction. If a surrogate is somewhere else
 825 // in the contraction, it is going to be handled as a pair of code units,
 826 // as it doesn't affect the performance AND handling surrogates specially
 827 // would complicate code way too much.
 828 static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
 829                                  UCAElements *element, UErrorCode *status) {
 830     CntTable *contractions = t->contractions;
 831     UChar32 cp;
 832     uint32_t cpsize = 0;
 833
 834     contractions->currentTag = CONTRACTION_TAG;
 835
 836     // First we need to check if contractions starts with a surrogate
 837     UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
 838
 839     if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first
 840       uint32_t j = 0;
 841       for (j=1; j<element->cSize; j++) {   /* First add contraction chars to unsafe CP hash table */
 842         // Unless it is a trail surrogate, which is handled algoritmically and
 843         // shouldn't take up space in the table.
 844         if(!(UTF_IS_TRAIL(element->cPoints[j]))) {
 845           unsafeCPSet(t->unsafeCP, element->cPoints[j]);
 846         }
 847       }
 848       // Add the last char of the contraction to the contraction-end hash table.
 849       // unless it is a trail surrogate, which is handled algorithmically and
 850       // shouldn't be in the table
 851       if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
 852         ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
 853       }
 854
 855       // If there are any Jamos in the contraction, we should turn on special
 856       // processing for Jamos
 857       if(UCOL_ISJAMO(element->cPoints[0])) {
 858         t->image->jamoSpecial = TRUE;
 859       }
 860       /* then we need to deal with it */
 861       /* we could aready have something in table - or we might not */
 862       element->cPoints+=cpsize;
 863       element->cSize-=cpsize;
 864       if(!isContraction(CE)) {
 865         /* if it wasn't contraction, we wouldn't end up here*/
 866         int32_t firstContractionOffset = 0;
 867         int32_t contractionOffset = 0;
 868         firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status);
 869         uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 870         contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
 871         contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status);
 872         CE =  constructContractCE(CONTRACTION_TAG, firstContractionOffset);
 873       } else { /* we are adding to existing contraction */
 874         /* there were already some elements in the table, so we need to add a new contraction */
 875         /* Two things can happen here: either the codepoint is already in the table, or it is not */
 876         int32_t position = uprv_cnttab_findCP(contractions, CE, *element->cPoints, status);
 877         if(position > 0) {       /* if it is we just continue down the chain */
 878           uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status);
 879           uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
 880           uprv_cnttab_setContraction(contractions, CE, position, *(element->cPoints), newCE, status);
 881         } else {                  /* if it isn't, we will have to create a new sequence */
 882           uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 883           uprv_cnttab_insertContraction(contractions, CE, *(element->cPoints), newCE, status);
 884         }
 885       }
 886       element->cPoints-=cpsize;
 887       element->cSize+=cpsize;
 888       /*ucmpe32_set(t->mapping, cp, CE);*/
 889       utrie_set32(t->mapping, cp, CE);
 890     } else if(!isContraction(CE)) { /* this is just a surrogate, and there is no contraction */
 891       /*ucmpe32_set(t->mapping, cp, element->mapCE);*/
 892       utrie_set32(t->mapping, cp, element->mapCE);
 893     } else { /* fill out the first stage of the contraction with the surrogate CE */
 894       uprv_cnttab_changeContraction(contractions, CE, 0, element->mapCE, status);
 895       uprv_cnttab_changeContraction(contractions, CE, 0xFFFF, element->mapCE, status);
 896     }
 897     return CE;
 898 }
 899
 900
 901 static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status) {
 902     int32_t firstContractionOffset = 0;
 903     int32_t contractionOffset = 0;
 904 //    uint32_t contractionElement = UCOL_NOT_FOUND;
 905
 906     if(U_FAILURE(*status)) {
 907         return UCOL_NOT_FOUND;
 908     }
 909
 910     /* end of recursion */
 911     if(element->cSize == 1) {
 912       if(isCntTableElement(existingCE) && ((UColCETags)getCETag(existingCE) == contractions->currentTag)) {
 913         uprv_cnttab_changeContraction(contractions, existingCE, 0, element->mapCE, status);
 914         uprv_cnttab_changeContraction(contractions, existingCE, 0xFFFF, element->mapCE, status);
 915         return existingCE;
 916       } else {
 917         return element->mapCE; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */
 918       }
 919     }
 920
 921     /* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */
 922     /* for both backward and forward cycles */
 923
 924     /* we encountered either an empty space or a non-contraction element */
 925     /* this means we are constructing a new contraction sequence */
 926     element->cPoints++;
 927     element->cSize--;
 928     if(!isCntTableElement(existingCE)) {
 929       /* if it wasn't contraction, we wouldn't end up here*/
 930       firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, existingCE, status);
 931       uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 932       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status);
 933       contractionOffset = uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, status);
 934       existingCE =  constructContractCE(contractions->currentTag, firstContractionOffset);
 935     } else { /* we are adding to existing contraction */
 936       /* there were already some elements in the table, so we need to add a new contraction */
 937       /* Two things can happen here: either the codepoint is already in the table, or it is not */
 938       int32_t position = uprv_cnttab_findCP(contractions, existingCE, *element->cPoints, status);
 939       if(position > 0) {       /* if it is we just continue down the chain */
 940         uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, status);
 941         uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status);
 942         uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, status);
 943       } else {                  /* if it isn't, we will have to create a new sequence */
 944         uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status);
 945         uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, status);
 946       }
 947     }
 948     element->cPoints--;
 949     element->cSize++;
 950     return existingCE;
 951 }
 952
 953 static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element, UErrorCode *status) {
 954   uint32_t CE = UCOL_NOT_FOUND;
 955   // This should add a completely ignorable element to the
 956   // unsafe table, so that backward iteration will skip
 957   // over it when treating contractions.
 958   uint32_t i = 0;
 959   if(element->mapCE == 0) {
 960     for(i = 0; i < element->cSize; i++) {
 961       if(!UTF_IS_TRAIL(element->cPoints[i])) {
 962         unsafeCPSet(t->unsafeCP, element->cPoints[i]);
 963       }
 964     }
 965   }
 966   if(element->cSize > 1) { /* we're adding a contraction */
 967     uint32_t i = 0;
 968     UChar32 cp;
 969
 970     UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp);
 971     /*CE = ucmpe32_get(t->mapping, cp);*/
 972     CE = utrie_get32(t->mapping, cp, NULL);
 973
 974     CE = uprv_uca_addContraction(t, CE, element, status);
 975   } else { /* easy case, */
 976     /*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/
 977     CE = utrie_get32(t->mapping, element->cPoints[0], NULL);
 978
 979     if( CE != UCOL_NOT_FOUND) {
 980       if(isCntTableElement(CE) /*isContraction(CE)*/) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */
 981         if(!isPrefix(element->mapCE)) { // we cannot reenter prefix elements - as we are going to create a dead loop
 982           // Only expansions and regular CEs can go here... Contractions will never happen in this place
 983             uprv_cnttab_setContraction(t->contractions, CE, 0, 0, element->mapCE, status);
 984             /* This loop has to change the CE at the end of contraction REDO!*/
 985             uprv_cnttab_changeLastCE(t->contractions, CE, element->mapCE, status);
 986         }
 987       } else {
 988         /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
 989         utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
 990 #ifdef UCOL_DEBUG
 991         fprintf(stderr, "Warning - trying to overwrite existing data %08X for cp %04X with %08X\n", CE, element->cPoints[0], element->CEs[0]);
 992         //*status = U_ILLEGAL_ARGUMENT_ERROR;
 993 #endif
 994       }
 995     } else {
 996       /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/
 997       utrie_set32(t->mapping, element->cPoints[0], element->mapCE);
 998     }
 999   }
1000   return CE;
1001 }
1002
1003 /* This adds a read element, while testing for existence */
1004 U_CAPI uint32_t  U_EXPORT2
1005 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) {
1006   ExpansionTable *expansions = t->expansions;
1007
1008   uint32_t i = 1;
1009   uint32_t expansion = 0;
1010   uint32_t CE;
1011
1012   if(U_FAILURE(*status)) {
1013       return 0xFFFF;
1014   }
1015
1016   element->mapCE = 0; // clear mapCE so that we can catch expansions
1017
1018   if(element->noOfCEs == 1) {
1019     if(element->isThai == FALSE) {
1020           element->mapCE = element->CEs[0];
1021     } else { /* add thai - totally bad here */
1022       expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (THAI_TAG<<UCOL_TAG_SHIFT)
1023         | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
1024         | 0x1);
1025       element->mapCE = expansion;
1026     }
1027   } else {
1028     /* ICU 2.1 long primaries */
1029     /* unfortunately, it looks like we have to look for a long primary here */
1030     /* since in canonical closure we are going to hit some long primaries from */
1031     /* the first phase, and they will come back as continuations/expansions */
1032     /* destroying the effect of the previous opitimization */
1033     /* A long primary is a three byte primary with starting secondaries and tertiaries */
1034     /* It can appear in long runs of only primary differences (like east Asian tailorings) */
1035     /* also, it should not be an expansion, as expansions would break with this */
1036     // This part came in from ucol_bld.cpp
1037     //if(tok->expansion == 0
1038       //&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1
1039       //&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) {
1040       /* we will construct a special CE that will go unchanged to the table */
1041     if(element->noOfCEs == 2 // a two CE expansion
1042       && isContinuation(element->CEs[1]) // which  is a continuation
1043       && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation,
1044       && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary
1045       && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary
1046       ) {
1047 #ifdef UCOL_DEBUG
1048       fprintf(stdout, "Long primary %04X\n", element->cPoints[0]);
1049 #endif
1050       element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special
1051         | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary
1052         | ((element->CEs[1]>>24) & 0xFF);   // third byte of primary
1053     } else {
1054       expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
1055         | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
1056         & 0xFFFFF0);
1057
1058       for(i = 1; i<element->noOfCEs; i++) {
1059         uprv_uca_addExpansion(expansions, element->CEs[i], status);
1060       }
1061       if(element->noOfCEs <= 0xF) {
1062         expansion |= element->noOfCEs;
1063       } else {
1064         uprv_uca_addExpansion(expansions, 0, status);
1065       }
1066       element->mapCE = expansion;
1067       uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1],
1068                                (uint8_t)element->noOfCEs,
1069                                t->maxExpansions,
1070                                status);
1071       if(UCOL_ISJAMO(element->cPoints[0])) {
1072         t->image->jamoSpecial = TRUE;
1073         uprv_uca_setMaxJamoExpansion(element->cPoints[0],
1074                                  element->CEs[element->noOfCEs - 1],
1075                                  (uint8_t)element->noOfCEs,
1076                                  t->maxJamoExpansions,
1077                                  status);
1078       }
1079     }
1080   }
1081
1082   // We treat digits differently - they are "uber special" and should be
1083   // processed differently if numeric collation is on.
1084   UChar32 uniChar = 0;
1085   //printElement(element);
1086   if ((element->cSize == 2) && U16_IS_LEAD(element->uchars[0])){
1087       uniChar = U16_GET_SUPPLEMENTARY(element->uchars[0], element->uchars[1]);
1088   } else if (element->cSize == 1){
1089       uniChar = element->uchars[0];
1090   }
1091
1092   // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only
1093   // one element to the expansion buffer. When we encounter a digit and we don't
1094   // do numeric collation, we will just pick the CE we have and break out of case
1095   // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked
1096   // a special, further processing will occur. If it's a simple CE, we'll return due
1097   // to how the loop is constructed.
1098   if (uniChar != 0 && u_isdigit(uniChar)){
1099       expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element
1100       if(element->mapCE) { // if there is an expansion, we'll pick it here
1101         expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4);
1102       } else {
1103         expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4);
1104       }
1105       element->mapCE = expansion;
1106
1107       // Need to go back to the beginning of the digit string if in the middle!
1108       if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars
1109         unsafeCPSet(t->unsafeCP, (UChar)uniChar);
1110       }
1111   }
1112
1113   // here we want to add the prefix structure.
1114   // I will try to process it as a reverse contraction, if possible.
1115   // prefix buffer is already reversed.
1116
1117   if(element->prefixSize!=0) {
1118     // We keep the seen prefix starter elements in a hashtable
1119     // we need it to be able to distinguish between the simple
1120     // codepoints and prefix starters. Also, we need to use it
1121     // for canonical closure.
1122
1123     UCAElements *composed = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1124     /* test for NULL */
1125     if (composed == NULL) {
1126         *status = U_MEMORY_ALLOCATION_ERROR;
1127         return 0;
1128     }
1129     uprv_memcpy(composed, element, sizeof(UCAElements));
1130     composed->cPoints = composed->uchars;
1131     composed->prefix = composed->prefixChars;
1132
1133     composed->prefixSize = unorm_normalize(element->prefix, element->prefixSize, UNORM_NFC, 0, composed->prefix, 128, status);
1134
1135
1136     if(t->prefixLookup != NULL) {
1137       UCAElements *uCE = (UCAElements *)uhash_get(t->prefixLookup, element);
1138       if(uCE != NULL) { // there is already a set of code points here
1139         element->mapCE = uprv_uca_addPrefix(t, uCE->mapCE, element, status);
1140       } else { // no code points, so this spot is clean
1141         element->mapCE = uprv_uca_addPrefix(t, UCOL_NOT_FOUND, element, status);
1142         uCE = (UCAElements *)uprv_malloc(sizeof(UCAElements));
1143         /* test for NULL */
1144         if (uCE == NULL) {
1145             *status = U_MEMORY_ALLOCATION_ERROR;
1146             return 0;
1147         }
1148         uprv_memcpy(uCE, element, sizeof(UCAElements));
1149         uCE->cPoints = uCE->uchars;
1150         uhash_put(t->prefixLookup, uCE, uCE, status);
1151       }
1152       if(composed->prefixSize != element->prefixSize || uprv_memcmp(composed->prefix, element->prefix, element->prefixSize)) {
1153         // do it!
1154         composed->mapCE = uprv_uca_addPrefix(t, element->mapCE, composed, status);
1155       }
1156     }
1157     uprv_free(composed);
1158   }
1159
1160   // We need to use the canonical iterator here
1161   // the way we do it is to generate the canonically equivalent strings
1162   // for the contraction and then add the sequences that pass FCD check
1163   if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
1164     UnicodeString source(element->cPoints, element->cSize);
1165     CanonicalIterator it(source, *status);
1166     source = it.next();
1167     while(!source.isBogus()) {
1168       if(Normalizer::quickCheck(source, UNORM_FCD, *status) != UNORM_NO) {
1169         element->cSize = source.extract(element->cPoints, 128, *status);
1170         uprv_uca_finalizeAddition(t, element, status);
1171       }
1172       source = it.next();
1173     }
1174     CE = element->mapCE;
1175   } else {
1176       CE = uprv_uca_finalizeAddition(t, element, status);
1177   }
1178
1179   return CE;
1180 }
1181
1182
1183 /*void uprv_uca_getMaxExpansionJamo(CompactEIntArray       *mapping, */
1184 static void uprv_uca_getMaxExpansionJamo(UNewTrie       *mapping,
1185                                   MaxExpansionTable     *maxexpansion,
1186                                   MaxJamoExpansionTable *maxjamoexpansion,
1187                                   UBool                  jamospecial,
1188                                   UErrorCode            *status)
1189 {
1190   const uint32_t VBASE  = 0x1161;
1191   const uint32_t TBASE  = 0x11A8;
1192   const uint32_t VCOUNT = 21;
1193   const uint32_t TCOUNT = 28;
1194
1195   uint32_t v = VBASE + VCOUNT - 1;
1196   uint32_t t = TBASE + TCOUNT - 1;
1197   uint32_t ce;
1198
1199   while (v >= VBASE) {
1200       /*ce = ucmpe32_get(mapping, v);*/
1201       ce = utrie_get32(mapping, v, NULL);
1202       if (ce < UCOL_SPECIAL_FLAG) {
1203           uprv_uca_setMaxExpansion(ce, 2, maxexpansion, status);
1204       }
1205       v --;
1206   }
1207
1208   while (t >= TBASE)
1209   {
1210       /*ce = ucmpe32_get(mapping, t);*/
1211       ce = utrie_get32(mapping, t, NULL);
1212       if (ce < UCOL_SPECIAL_FLAG) {
1213           uprv_uca_setMaxExpansion(ce, 3, maxexpansion, status);
1214       }
1215       t --;
1216   }
1217   /*  According to the docs, 99% of the time, the Jamo will not be special */
1218   if (jamospecial) {
1219       /* gets the max expansion in all unicode characters */
1220       int     count    = maxjamoexpansion->position;
1221       uint8_t maxTSize = (uint8_t)(maxjamoexpansion->maxLSize +
1222                                    maxjamoexpansion->maxVSize +
1223                                    maxjamoexpansion->maxTSize);
1224       uint8_t maxVSize = (uint8_t)(maxjamoexpansion->maxLSize +
1225                                    maxjamoexpansion->maxVSize);
1226
1227       while (count > 0) {
1228           count --;
1229           if (*(maxjamoexpansion->isV + count) == TRUE) {
1230                 uprv_uca_setMaxExpansion(
1231                                    *(maxjamoexpansion->endExpansionCE + count),
1232                                    maxVSize, maxexpansion, status);
1233           }
1234           else {
1235                 uprv_uca_setMaxExpansion(
1236                                    *(maxjamoexpansion->endExpansionCE + count),
1237                                    maxTSize, maxexpansion, status);
1238           }
1239       }
1240   }
1241 }
1242
1243 U_CDECL_BEGIN
1244 static inline uint32_t U_CALLCONV
1245 getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset)
1246 {
1247   uint32_t value;
1248   uint32_t tag;
1249   UChar32 limit;
1250   UBool inBlockZero;
1251
1252   limit=start+0x400;
1253   while(start<limit) {
1254       value=utrie_get32(trie, start, &inBlockZero);
1255       tag = getCETag(value);
1256       if(inBlockZero == TRUE) {
1257           start+=UTRIE_DATA_BLOCK_LENGTH;
1258       } else if(!(isSpecial(value) && (tag == IMPLICIT_TAG || tag == NOT_FOUND_TAG))) {
1259         /* These are values that are starting in either UCA (IMPLICIT_TAG) or in the
1260          * tailorings (NOT_FOUND_TAG). Presence of these tags means that there is
1261          * nothing in this position and that it should be skipped.
1262          */
1263 #ifdef UCOL_DEBUG
1264         static int32_t count = 1;
1265         fprintf(stdout, "%i, Folded %08X, value %08X\n", count++, start, value);
1266 #endif
1267           return (uint32_t)(UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24) | offset);
1268       } else {
1269           ++start;
1270       }
1271   }
1272   return 0;
1273 }
1274 U_CDECL_END
1275
1276 #ifdef UCOL_DEBUG
1277 // This is a debug function to print the contents of a trie.
1278 // It is used in conjuction with the code around utrie_unserialize call
1279 void enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
1280   if(start<0x10000) {
1281     fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value);
1282   } else {
1283     fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value);
1284   }
1285 }
1286
1287 int32_t
1288 myGetFoldingOffset(uint32_t data) {
1289   if(data > UCOL_NOT_FOUND && getCETag(data) == SURROGATE_TAG) {
1290     return (data&0xFFFFFF);
1291   } else {
1292     return 0;
1293   }
1294 }
1295 #endif
1296
1297 U_CAPI UCATableHeader* U_EXPORT2
1298 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) {
1299     /*CompactEIntArray *mapping = t->mapping;*/
1300     UNewTrie *mapping = t->mapping;
1301     ExpansionTable *expansions = t->expansions;
1302     CntTable *contractions = t->contractions;
1303     MaxExpansionTable *maxexpansion = t->maxExpansions;
1304
1305     if(U_FAILURE(*status)) {
1306         return NULL;
1307     }
1308
1309     uint32_t beforeContractions = (uint32_t)((headersize+paddedsize(expansions->position*sizeof(uint32_t)))/sizeof(UChar));
1310
1311     int32_t contractionsSize = 0;
1312     contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status);
1313
1314     /* the following operation depends on the trie data. Therefore, we have to do it before */
1315     /* the trie is compacted */
1316     /* sets jamo expansions */
1317     uprv_uca_getMaxExpansionJamo(mapping, maxexpansion, t->maxJamoExpansions,
1318                                  t->image->jamoSpecial, status);
1319
1320     /*ucmpe32_compact(mapping);*/
1321     /*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/
1322     /*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/
1323     /*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/
1324
1325     // After setting the jamo expansions, compact the trie and get the needed size
1326     int32_t mappingSize = utrie_serialize(mapping, NULL, 0, getFoldedValue /*getFoldedValue*/, FALSE, status);
1327
1328     uint32_t tableOffset = 0;
1329     uint8_t *dataStart;
1330
1331     /* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */
1332
1333     uint32_t toAllocate =(uint32_t)(headersize+
1334                                     paddedsize(expansions->position*sizeof(uint32_t))+
1335                                     paddedsize(mappingSize)+
1336                                     paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t)))+
1337                                     //paddedsize(0x100*sizeof(uint32_t))  /* Latin1 is now included in the trie */
1338                                      /* maxexpansion array */
1339                                      + paddedsize(maxexpansion->position * sizeof(uint32_t)) +
1340                                      /* maxexpansion size array */
1341                                      paddedsize(maxexpansion->position * sizeof(uint8_t)) +
1342                                      paddedsize(UCOL_UNSAFECP_TABLE_SIZE) +   /*  Unsafe chars             */
1343                                      paddedsize(UCOL_UNSAFECP_TABLE_SIZE));    /*  Contraction Ending chars */
1344
1345
1346     dataStart = (uint8_t *)uprv_malloc(toAllocate);
1347     /* test for NULL */
1348     if (dataStart == NULL) {
1349         *status = U_MEMORY_ALLOCATION_ERROR;
1350         return NULL;
1351     }
1352
1353     UCATableHeader *myData = (UCATableHeader *)dataStart;
1354     // Please, do reset all the fields!
1355     uprv_memset(dataStart, 0, toAllocate);
1356     // Make sure we know this is reset
1357     myData->magic = UCOL_HEADER_MAGIC;
1358     myData->isBigEndian = U_IS_BIG_ENDIAN;
1359     myData->charSetFamily = U_CHARSET_FAMILY;
1360     myData->formatVersion[0] = UCA_FORMAT_VERSION_0;
1361     myData->formatVersion[1] = UCA_FORMAT_VERSION_1;
1362     myData->formatVersion[2] = UCA_FORMAT_VERSION_2;
1363     myData->formatVersion[3] = UCA_FORMAT_VERSION_3;
1364     myData->jamoSpecial = t->image->jamoSpecial;
1365
1366     // Don't copy stuff from UCA header!
1367     //uprv_memcpy(myData, t->image, sizeof(UCATableHeader));
1368
1369     myData->contractionSize = contractionsSize;
1370
1371     tableOffset += (uint32_t)(paddedsize(sizeof(UCATableHeader)));
1372
1373     myData->options = tableOffset;
1374     uprv_memcpy(dataStart+tableOffset, t->options, sizeof(UColOptionSet));
1375     tableOffset += (uint32_t)(paddedsize(sizeof(UColOptionSet)));
1376
1377     /* copy expansions */
1378     /*myData->expansion = (uint32_t *)dataStart+tableOffset;*/
1379     myData->expansion = tableOffset;
1380     uprv_memcpy(dataStart+tableOffset, expansions->CEs, expansions->position*sizeof(uint32_t));
1381     tableOffset += (uint32_t)(paddedsize(expansions->position*sizeof(uint32_t)));
1382
1383     /* contractions block */
1384     if(contractionsSize != 0) {
1385       /* copy contraction index */
1386       /*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/
1387       myData->contractionIndex = tableOffset;
1388       uprv_memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar));
1389       tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(UChar)));
1390
1391       /* copy contraction collation elements */
1392       /*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/
1393       myData->contractionCEs = tableOffset;
1394       uprv_memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t));
1395       tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t)));
1396     } else {
1397       myData->contractionIndex = 0;
1398       myData->contractionCEs = 0;
1399     }
1400
1401     /* copy mapping table */
1402     /*myData->mappingPosition = dataStart+tableOffset;*/
1403     /*myData->mappingPosition = tableOffset;*/
1404     /*uprv_memcpy(dataStart+tableOffset, flattened, mappingSize);*/
1405
1406     myData->mappingPosition = tableOffset;
1407     utrie_serialize(mapping, dataStart+tableOffset, toAllocate-tableOffset, getFoldedValue, FALSE, status);
1408 #ifdef UCOL_DEBUG
1409     // This is debug code to dump the contents of the trie. It needs two functions defined above
1410     {
1411       UTrie UCAt = { 0 };
1412       uint32_t trieWord;
1413       utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status);
1414       UCAt.getFoldingOffset = myGetFoldingOffset;
1415       if(U_SUCCESS(*status)) {
1416         utrie_enum(&UCAt, NULL, enumRange, NULL);
1417       }
1418       trieWord = UTRIE_GET32_FROM_LEAD(UCAt, 0xDC01)
1419     }
1420 #endif
1421     tableOffset += paddedsize(mappingSize);
1422
1423
1424     int32_t i = 0;
1425
1426     /* copy max expansion table */
1427     myData->endExpansionCE      = tableOffset;
1428     myData->endExpansionCECount = maxexpansion->position;
1429     /* not copying the first element which is a dummy */
1430     uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1,
1431                 maxexpansion->position * sizeof(uint32_t));
1432     tableOffset += (uint32_t)(paddedsize(maxexpansion->position * sizeof(uint32_t)));
1433     myData->expansionCESize = tableOffset;
1434     uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1,
1435                 maxexpansion->position * sizeof(uint8_t));
1436     tableOffset += (uint32_t)(paddedsize(maxexpansion->position * sizeof(uint8_t)));
1437
1438     /* Unsafe chars table.  Finish it off, then copy it. */
1439     uprv_uca_unsafeCPAddCCNZ(t, status);
1440     if (t->UCA != 0) {              /* Or in unsafebits from UCA, making a combined table.    */
1441        for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1442            t->unsafeCP[i] |= t->UCA->unsafeCP[i];
1443        }
1444     }
1445     myData->unsafeCP = tableOffset;
1446     uprv_memcpy(dataStart + tableOffset, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE);
1447     tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1448
1449
1450     /* Finish building Contraction Ending chars hash table and then copy it out.  */
1451     if (t->UCA != 0) {              /* Or in unsafebits from UCA, making a combined table.    */
1452         for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) {
1453             t->contrEndCP[i] |= t->UCA->contrEndCP[i];
1454         }
1455     }
1456     myData->contrEndCP = tableOffset;
1457     uprv_memcpy(dataStart + tableOffset, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE);
1458     tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE);
1459
1460     if(tableOffset != toAllocate) {
1461 #ifdef UCOL_DEBUG
1462         fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset);
1463 #endif
1464         *status = U_INTERNAL_PROGRAM_ERROR;
1465         uprv_free(dataStart);
1466         return 0;
1467     }
1468
1469     myData->size = tableOffset;
1470     /* This should happen upon ressurection */
1471     /*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/
1472     /*uprv_mstrm_close(ms);*/
1473     return myData;
1474 }
1475
1476
1477 struct enumStruct {
1478   tempUCATable *t;
1479   UCollator *tempColl;
1480   UCollationElements* colEl;
1481   int32_t noOfClosures;
1482   UErrorCode *status;
1483 };
1484 U_CDECL_BEGIN
1485 static UBool U_CALLCONV
1486 _enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1487
1488   UErrorCode *status = ((enumStruct *)context)->status;
1489   tempUCATable *t = ((enumStruct *)context)->t;
1490   UCollator *tempColl = ((enumStruct *)context)->tempColl;
1491   UCollationElements* colEl = ((enumStruct *)context)->colEl;
1492   UCAElements el;
1493   UChar decomp[256] = { 0 };
1494   int32_t noOfDec = 0;
1495
1496   UChar32 u32 = 0;
1497   UChar comp[2];
1498   uint32_t len = 0;
1499
1500   if (type != U_UNASSIGNED && type != U_PRIVATE_USE_CHAR) { // if the range is assigned - we might ommit more categories later
1501     for(u32 = start; u32 < limit; u32++) {
1502       noOfDec = unorm_getDecomposition(u32, FALSE, decomp, 256);
1503       //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1
1504         //|| (noOfDec == 1 && *decomp != (UChar)u32))
1505       if(noOfDec > 0) // if we're positive, that means there is no decomposition
1506       {
1507         len = 0;
1508         UTF_APPEND_CHAR_UNSAFE(comp, len, u32);
1509         if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) {
1510 #ifdef UCOL_DEBUG
1511           fprintf(stderr, "Closure: %08X -> ", u32);
1512           uint32_t i = 0;
1513           for(i = 0; i<noOfDec; i++) {
1514             fprintf(stderr, "%04X ", decomp[i]);
1515           }
1516           fprintf(stderr, "\n");
1517 #endif
1518           ((enumStruct *)context)->noOfClosures++;
1519           el.cPoints = decomp;
1520           el.cSize = noOfDec;
1521           el.noOfCEs = 0;
1522           el.prefix = el.prefixChars;
1523           el.prefixSize = 0;
1524
1525           UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el);
1526           if(prefix == NULL) {
1527             el.cPoints = comp;
1528             el.cSize = len;
1529             el.prefix = el.prefixChars;
1530             el.prefixSize = 0;
1531             el.noOfCEs = 0;
1532             ucol_setText(colEl, decomp, noOfDec, status);
1533             while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
1534               el.noOfCEs++;
1535             }
1536           } else {
1537             el.cPoints = comp;
1538             el.cSize = len;
1539             el.prefix = el.prefixChars;
1540             el.prefixSize = 0;
1541             el.noOfCEs = 1;
1542             el.CEs[0] = prefix->mapCE;
1543             // This character uses a prefix. We have to add it
1544             // to the unsafe table, as it decomposed form is already
1545             // in. In Japanese, this happens for \u309e & \u30fe
1546             // Since unsafeCPSet is static in ucol_elm, we are going
1547             // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function
1548           }
1549           if(UCOL_ISTHAIPREVOWEL(el.cPoints[0])) {
1550             el.isThai = TRUE;
1551           } else {
1552             el.isThai = FALSE;
1553           }
1554
1555           uprv_uca_addAnElement(t, &el, status);
1556         }
1557       }
1558     }
1559   }
1560   return TRUE;
1561 }
1562 U_CDECL_END
1563
1564 U_CAPI int32_t U_EXPORT2
1565 uprv_uca_canonicalClosure(tempUCATable *t, UErrorCode *status)
1566 {
1567   enumStruct context;
1568   context.noOfClosures = 0;
1569   if(U_SUCCESS(*status)) {
1570     UCollator *tempColl = NULL;
1571     tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status);
1572
1573     UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status);
1574     tempColl = ucol_initCollator(tempData, 0, t->UCA, status);
1575     uprv_uca_closeTempTable(tempTable);
1576
1577     if(U_SUCCESS(*status)) {
1578       tempColl->rb = NULL;
1579       tempColl->elements = NULL;
1580       tempColl->validLocale = NULL;
1581       tempColl->requestedLocale = NULL;
1582       tempColl->hasRealData = TRUE;
1583       tempColl->freeImageOnClose = TRUE;
1584     } else if(tempData != 0) {
1585       uprv_free(tempData);
1586     }
1587
1588     /* produce canonical closure */
1589     UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status);
1590
1591     context.t = t;
1592     context.tempColl = tempColl;
1593     context.colEl = colEl;
1594     context.status = status;
1595     u_enumCharTypes(_enumCategoryRangeClosureCategory, &context);
1596
1597     ucol_closeElements(colEl);
1598     ucol_close(tempColl);
1599   }
1600   return context.noOfClosures;
1601 }
1602
1603 U_NAMESPACE_END
1604
1605 #endif /* #if !UCONFIG_NO_COLLATION */
1606
1607