X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..635ab21dab8439adcb4398246a23211b9a46cc1a:/icuSources/i18n/ucol_bld.cpp diff --git a/icuSources/i18n/ucol_bld.cpp b/icuSources/i18n/ucol_bld.cpp index f9bc084e..cfc5d6d3 100644 --- a/icuSources/i18n/ucol_bld.cpp +++ b/icuSources/i18n/ucol_bld.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2003, International Business Machines +* Copyright (C) 2001-2004, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -28,7 +28,7 @@ #include "umutex.h" #include "unicode/uniset.h" -static const InverseUCATableHeader* invUCA = NULL; +static const InverseUCATableHeader* _staticInvUCA = NULL; static UDataMemory* invUCA_DATA_MEM = NULL; U_CDECL_BEGIN @@ -40,15 +40,15 @@ isAcceptableInvUCA(void * /*context*/, if( pInfo->size>=20 && pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY && - pInfo->dataFormat[0]==invUcaDataInfo.dataFormat[0] && /* dataFormat="InvC" */ - pInfo->dataFormat[1]==invUcaDataInfo.dataFormat[1] && - pInfo->dataFormat[2]==invUcaDataInfo.dataFormat[2] && - pInfo->dataFormat[3]==invUcaDataInfo.dataFormat[3] && - pInfo->formatVersion[0]==invUcaDataInfo.formatVersion[0] && - pInfo->formatVersion[1]>=invUcaDataInfo.formatVersion[1] //&& - //pInfo->formatVersion[1]==invUcaDataInfo.formatVersion[1] && - //pInfo->formatVersion[2]==invUcaDataInfo.formatVersion[2] && - //pInfo->formatVersion[3]==invUcaDataInfo.formatVersion[3] && + pInfo->dataFormat[0]==INVUCA_DATA_FORMAT_0 && /* dataFormat="InvC" */ + pInfo->dataFormat[1]==INVUCA_DATA_FORMAT_1 && + pInfo->dataFormat[2]==INVUCA_DATA_FORMAT_2 && + pInfo->dataFormat[3]==INVUCA_DATA_FORMAT_3 && + pInfo->formatVersion[0]==INVUCA_FORMAT_VERSION_0 && + pInfo->formatVersion[1]>=INVUCA_FORMAT_VERSION_1 //&& + //pInfo->formatVersion[1]==INVUCA_FORMAT_VERSION_1 && + //pInfo->formatVersion[2]==INVUCA_FORMAT_VERSION_2 && + //pInfo->formatVersion[3]==INVUCA_FORMAT_VERSION_3 && ) { UVersionInfo UCDVersion; u_getUnicodeVersion(UCDVersion); @@ -68,11 +68,11 @@ isAcceptableInvUCA(void * /*context*/, U_CDECL_END static -int32_t ucol_inv_findCE(uint32_t CE, uint32_t SecondCE) { - uint32_t bottom = 0, top = invUCA->tableSize; +int32_t ucol_inv_findCE(const UColTokenParser *src, uint32_t CE, uint32_t SecondCE) { + uint32_t bottom = 0, top = src->invUCA->tableSize; uint32_t i = 0; uint32_t first = 0, second = 0; - uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table); + uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table); while(bottom < top-1) { i = (top+bottom)/2; @@ -116,13 +116,14 @@ static const uint32_t strengthMask[UCOL_CE_STRENGTH_LIMIT] = { 0xFFFFFFFF }; -U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(uint32_t CE, uint32_t contCE, +U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, + uint32_t CE, uint32_t contCE, uint32_t *nextCE, uint32_t *nextContCE, uint32_t strength) { - uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table); + uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table); int32_t iCE; - iCE = ucol_inv_findCE(CE, contCE); + iCE = ucol_inv_findCE(src, CE, contCE); if(iCE<0) { *nextCE = UCOL_NOT_FOUND; @@ -144,13 +145,14 @@ U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(uint32_t CE, uint32_t contCE, return iCE; } -U_CAPI int32_t U_EXPORT2 ucol_inv_getPrevCE(uint32_t CE, uint32_t contCE, +U_CAPI int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, + uint32_t CE, uint32_t contCE, uint32_t *prevCE, uint32_t *prevContCE, uint32_t strength) { - uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table); + uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table); int32_t iCE; - iCE = ucol_inv_findCE(CE, contCE); + iCE = ucol_inv_findCE(src, CE, contCE); if(iCE<0) { *prevCE = UCOL_NOT_FOUND; @@ -174,17 +176,30 @@ U_CAPI int32_t U_EXPORT2 ucol_inv_getPrevCE(uint32_t CE, uint32_t contCE, return iCE; } +U_CAPI uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t contCE, + uint32_t prevCE, uint32_t prevContCE) { + uint32_t strength = UCOL_TERTIARY; + while(((prevCE & strengthMask[strength]) != (CE & strengthMask[strength]) + || (prevContCE & strengthMask[strength]) != (contCE & strengthMask[strength])) + && strength) { + strength--; + } + return strength; + +} + + static -inline int32_t ucol_inv_getPrevious(UColTokListHeader *lh, uint32_t strength) { +inline int32_t ucol_inv_getPrevious(UColTokenParser *src, UColTokListHeader *lh, uint32_t strength) { uint32_t CE = lh->baseCE; uint32_t SecondCE = lh->baseContCE; - uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table); + uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table); uint32_t previousCE, previousContCE; int32_t iCE; - iCE = ucol_inv_findCE(CE, SecondCE); + iCE = ucol_inv_findCE(src, CE, SecondCE); if(iCE<0) { return -1; @@ -207,15 +222,15 @@ inline int32_t ucol_inv_getPrevious(UColTokListHeader *lh, uint32_t strength) { } static -inline int32_t ucol_inv_getNext(UColTokListHeader *lh, uint32_t strength) { +inline int32_t ucol_inv_getNext(UColTokenParser *src, UColTokListHeader *lh, uint32_t strength) { uint32_t CE = lh->baseCE; uint32_t SecondCE = lh->baseContCE; - uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table); + uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table); uint32_t nextCE, nextContCE; int32_t iCE; - iCE = ucol_inv_findCE(CE, SecondCE); + iCE = ucol_inv_findCE(src, CE, SecondCE); if(iCE<0) { return -1; @@ -242,7 +257,7 @@ inline int32_t ucol_inv_getNext(UColTokListHeader *lh, uint32_t strength) { U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) { /* reset all the gaps */ int32_t i = 0; - uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table); + uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table); uint32_t st = 0; uint32_t t1, t2; int32_t pos; @@ -265,7 +280,7 @@ U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *l UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts); - if(lh->baseCE >= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && lh->baseCE < (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */ + if((lh->baseCE & 0xFF000000)>= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (lh->baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */ //if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */ lh->pos[0] = 0; t1 = lh->baseCE; @@ -273,16 +288,12 @@ U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *l lh->gapsLo[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16; lh->gapsLo[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8; lh->gapsLo[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16; - if(lh->baseCE < 0xEF000000) { - /* first implicits have three byte primaries, with a gap of one */ - /* so we esentially need to add 2 to the top byte in lh->baseContCE */ - t2 += 0x02000000; - } else { - /* second implicits have four byte primaries, with a gap of IMPLICIT_LAST2_MULTIPLIER_ */ - /* Now, this guy is not really accessible here, so until we find a better way to pass it */ - /* around, we'll assume that the gap is 1 */ - t2 += 0x00020000; - } + uint32_t primaryCE = t1 & UCOL_PRIMARYMASK | (t2 & UCOL_PRIMARYMASK) >> 16; + primaryCE = uprv_uca_getImplicitFromRaw(uprv_uca_getRawFromImplicit(primaryCE)+1); + + t1 = primaryCE & UCOL_PRIMARYMASK | 0x0505; + t2 = (primaryCE << 16) & UCOL_PRIMARYMASK | UCOL_CONTINUATION_MARKER; + lh->gapsHi[0] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16; lh->gapsHi[1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8; lh->gapsHi[2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16; @@ -302,7 +313,7 @@ U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *l } else { for(;;) { if(tokStrength < UCOL_CE_STRENGTH_LIMIT) { - if((lh->pos[tokStrength] = ucol_inv_getNext(lh, tokStrength)) >= 0) { + if((lh->pos[tokStrength] = ucol_inv_getNext(src, lh, tokStrength)) >= 0) { lh->fStrToken[tokStrength] = tok; } else { /* The CE must be implicit, since it's not in the table */ /* Error */ @@ -339,9 +350,11 @@ U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *l lh->gapsHi[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8; //lh->gapsHi[3*st+2] = (UCOL_TERTIARYORDER(t1)) << 24 | (UCOL_TERTIARYORDER(t2)) << 16; lh->gapsHi[3*st+2] = (t1&0x3f) << 24 | (t2&0x3f) << 16; - pos--; - t1 = *(CETable+3*(pos)); - t2 = *(CETable+3*(pos)+1); + //pos--; + //t1 = *(CETable+3*(pos)); + //t2 = *(CETable+3*(pos)+1); + t1 = lh->baseCE; + t2 = lh->baseContCE; lh->gapsLo[3*st] = (t1 & UCOL_PRIMARYMASK) | (t2 & UCOL_PRIMARYMASK) >> 16; lh->gapsLo[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 | (t2 & UCOL_SECONDARYMASK) << 8; lh->gapsLo[3*st+2] = (t1&0x3f) << 24 | (t2&0x3f) << 16; @@ -402,7 +415,14 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_ uint32_t strength = tok->strength; uint32_t low = lows[fStrength*3+strength]; uint32_t high = highs[fStrength*3+strength]; - uint32_t maxByte = (strength == UCOL_TERTIARY)?0x3F:0xFF; + uint32_t maxByte = 0; + if(strength == UCOL_TERTIARY) { + maxByte = 0x3F; + } else if(strength == UCOL_PRIMARY) { + maxByte = 0xFE; + } else { + maxByte = 0xFF; + } uint32_t count = tok->toInsert; @@ -439,9 +459,10 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_ if(high > (UCOL_COMMON_BOT2<<24) && high < (uint32_t)(UCOL_COMMON_TOP2<<24)) { high = UCOL_COMMON_TOP2<<24; } - if(low < UCOL_COMMON_BOT2<<24) { - g->noOfRanges = ucol_allocWeights(UCOL_COMMON_TOP2<<24, high, count, maxByte, g->ranges); - g->current = UCOL_COMMON_BOT2; + if(low < (UCOL_COMMON_BOT2<<24)) { + g->noOfRanges = ucol_allocWeights(UCOL_BYTE_UNSHIFTED_MIN<<24, high, count, maxByte, g->ranges); + g->current = ucol_nextWeight(g->ranges, &g->noOfRanges); + //g->current = UCOL_COMMON_BOT2<<24; return g->current; } } @@ -454,7 +475,127 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_ return g->current; } -U_CFUNC void ucol_doCE(uint32_t *CEparts, UColToken *tok) { +static +uint32_t u_toLargeKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) { + uint32_t i = 0; + UChar c; + + if(U_FAILURE(*status)) { + return 0; + } + + if(sourceLen > resLen) { + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + for(i = 0; i < sourceLen; i++) { + c = source[i]; + if(0x3042 < c && c < 0x30ef) { /* Kana range */ + switch(c - 0x3000) { + case 0x41: case 0x43: case 0x45: case 0x47: case 0x49: case 0x63: case 0x83: case 0x85: case 0x8E: + case 0xA1: case 0xA3: case 0xA5: case 0xA7: case 0xA9: case 0xC3: case 0xE3: case 0xE5: case 0xEE: + c++; + break; + case 0xF5: + c = 0x30AB; + break; + case 0xF6: + c = 0x30B1; + break; + } + } + resBuf[i] = c; + } + return sourceLen; +} + +static +uint32_t u_toSmallKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) { + uint32_t i = 0; + UChar c; + + if(U_FAILURE(*status)) { + return 0; + } + + if(sourceLen > resLen) { + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + for(i = 0; i < sourceLen; i++) { + c = source[i]; + if(0x3042 < c && c < 0x30ef) { /* Kana range */ + switch(c - 0x3000) { + case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F: + case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF: + c--; + break; + case 0xAB: + c = 0x30F5; + break; + case 0xB1: + c = 0x30F6; + break; + } + } + resBuf[i] = c; + } + return sourceLen; +} + +static +uint8_t ucol_uprv_getCaseBits(const UCollator *UCA, const UChar *src, uint32_t len, UErrorCode *status) { + uint32_t i = 0; + UChar n[128]; + uint32_t nLen = 0; + uint32_t uCount = 0, lCount = 0; + + collIterate s; + uint32_t order = 0; + + if(U_FAILURE(*status)) { + return UCOL_LOWER_CASE; + } + + nLen = unorm_normalize(src, len, UNORM_NFKD, 0, n, 128, status); + if(U_SUCCESS(*status)) { + for(i = 0; i < nLen; i++) { + uprv_init_collIterate(UCA, &n[i], 1, &s); + order = ucol_getNextCE(UCA, &s, status); + if(isContinuation(order)) { + *status = U_INTERNAL_PROGRAM_ERROR; + return UCOL_LOWER_CASE; + } + if((order&UCOL_CASE_BIT_MASK)== UCOL_UPPER_CASE) { + uCount++; + } else { + if(u_islower(n[i])) { + lCount++; + } else { + UChar sk[1], lk[1]; + u_toSmallKana(&n[i], 1, sk, 1, status); + u_toLargeKana(&n[i], 1, lk, 1, status); + if(sk[0] == n[i] && lk[0] != n[i]) { + lCount++; + } + } + } + } + } + + if(uCount != 0 && lCount != 0) { + return UCOL_MIXED_CASE; + } else if(uCount != 0) { + return UCOL_UPPER_CASE; + } else { + return UCOL_LOWER_CASE; + } +} + + +U_CFUNC void ucol_doCE(UColTokenParser *src, uint32_t *CEparts, UColToken *tok, UErrorCode *status) { /* this one makes the table and stuff */ uint32_t noOfBytes[3]; uint32_t i; @@ -494,6 +635,22 @@ U_CFUNC void ucol_doCE(uint32_t *CEparts, UColToken *tok) { tok->noOfCEs = CEi; } + + // we want to set case bits here and now, not later. + // Case bits handling + tok->CEs[0] &= 0xFFFFFF3F; // Clean the case bits field + int32_t cSize = (tok->source & 0xFF000000) >> 24; + UChar *cPoints = (tok->source & 0x00FFFFFF) + src->source; + + if(cSize > 1) { + // Do it manually + tok->CEs[0] |= ucol_uprv_getCaseBits(src->UCA, cPoints, cSize, status); + } else { + // Copy it from the UCA + uint32_t caseCE = ucol_getFirstCE(src->UCA, cPoints[0], status); + tok->CEs[0] |= (caseCE & 0xC0); + } + #if UCOL_DEBUG==2 fprintf(stderr, "%04X str: %i, [%08X, %08X, %08X]: tok: ", tok->debugSource, tok->strength, CEparts[0] >> (32-8*noOfBytes[0]), CEparts[1] >> (32-8*noOfBytes[1]), CEparts[2]>> (32-8*noOfBytes[2])); for(i = 0; inoOfCEs; i++) { @@ -604,130 +761,11 @@ U_CFUNC void ucol_initBuffers(UColTokenParser *src, UColTokListHeader *lh, UErro CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TERTIARY], tok, UCOL_TERTIARY, status); } } - ucol_doCE(CEparts, tok); + ucol_doCE(src, CEparts, tok, status); tok = tok->next; } } -static -uint32_t u_toLargeKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) { - uint32_t i = 0; - UChar c; - - if(U_FAILURE(*status)) { - return 0; - } - - if(sourceLen > resLen) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - for(i = 0; i < sourceLen; i++) { - c = source[i]; - if(0x3042 < c && c < 0x30ef) { /* Kana range */ - switch(c - 0x3000) { - case 0x41: case 0x43: case 0x45: case 0x47: case 0x49: case 0x63: case 0x83: case 0x85: case 0x8E: - case 0xA1: case 0xA3: case 0xA5: case 0xA7: case 0xA9: case 0xC3: case 0xE3: case 0xE5: case 0xEE: - c++; - break; - case 0xF5: - c = 0x30AB; - break; - case 0xF6: - c = 0x30B1; - break; - } - } - resBuf[i] = c; - } - return sourceLen; -} - -static -uint32_t u_toSmallKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) { - uint32_t i = 0; - UChar c; - - if(U_FAILURE(*status)) { - return 0; - } - - if(sourceLen > resLen) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - for(i = 0; i < sourceLen; i++) { - c = source[i]; - if(0x3042 < c && c < 0x30ef) { /* Kana range */ - switch(c - 0x3000) { - case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: case 0x84: case 0x86: case 0x8F: - case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: case 0xE4: case 0xE6: case 0xEF: - c--; - break; - case 0xAB: - c = 0x30F5; - break; - case 0xB1: - c = 0x30F6; - break; - } - } - resBuf[i] = c; - } - return sourceLen; -} - -static -uint8_t ucol_uprv_getCaseBits(const UCollator *UCA, const UChar *src, uint32_t len, UErrorCode *status) { - uint32_t i = 0; - UChar n[128]; - uint32_t nLen = 0; - uint32_t uCount = 0, lCount = 0; - - collIterate s; - uint32_t order = 0; - - if(U_FAILURE(*status)) { - return UCOL_LOWER_CASE; - } - - nLen = unorm_normalize(src, len, UNORM_NFKD, 0, n, 128, status); - if(U_SUCCESS(*status)) { - for(i = 0; i < nLen; i++) { - uprv_init_collIterate(UCA, &n[i], 1, &s); - order = ucol_getNextCE(UCA, &s, status); - if(isContinuation(order)) { - *status = U_INTERNAL_PROGRAM_ERROR; - return UCOL_LOWER_CASE; - } - if((order&UCOL_CASE_BIT_MASK)== UCOL_UPPER_CASE) { - uCount++; - } else { - if(u_islower(n[i])) { - lCount++; - } else { - UChar sk[1], lk[1]; - u_toSmallKana(&n[i], 1, sk, 1, status); - u_toLargeKana(&n[i], 1, lk, 1, status); - if(sk[0] == n[i] && lk[0] != n[i]) { - lCount++; - } - } - } - } - } - - if(uCount != 0 && lCount != 0) { - return UCOL_MIXED_CASE; - } else if(uCount != 0) { - return UCOL_UPPER_CASE; - } else { - return UCOL_LOWER_CASE; - } -} - U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokListHeader *lh, UErrorCode *status) { UCAElements el; UColToken *tok = lh->first; @@ -834,6 +872,8 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL } } +#if 0 + // we do case bits in doCE now, since we will mess up expansions otherwise. // Case bits handling el.CEs[0] &= 0xFFFFFF3F; // Clean the case bits field if(el.cSize > 1) { @@ -844,6 +884,7 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL uint32_t caseCE = ucol_getFirstCE(src->UCA, el.cPoints[0], status); el.CEs[0] |= (caseCE & 0xC0); } +#endif /* and then, add it */ #if UCOL_DEBUG==2 @@ -1036,7 +1077,12 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st /* We stuff the initial value in the buffers, and increase the appropriate buffer */ /* According to strength */ if(U_SUCCESS(*status)) { - ucol_initBuffers(src, &src->lh[i], status); + if(src->lh[i].first) { // if there are any elements + // due to the way parser works, subsequent tailorings + // may remove all the elements from a sequence, therefore + // leaving an empty tailoring sequence. + ucol_initBuffers(src, &src->lh[i], status); + } } if(U_FAILURE(*status)) { return NULL; @@ -1062,7 +1108,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st } - tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, status); + tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOUND_TAG, NOT_FOUND_TAG, status); /* After this, we have assigned CE values to all regular CEs */ @@ -1129,7 +1175,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st el.cSize = 2; } ucol_setText(ucaEl, el.uchars, el.cSize, status); - while ((el.CEs[el.noOfCEs] = ucol_next(ucaEl, status)) != UCOL_NULLORDER) { + while ((int32_t)(el.CEs[el.noOfCEs] = ucol_next(ucaEl, status)) != UCOL_NULLORDER) { el.noOfCEs++; } uprv_uca_addAnElement(t, &el, status); @@ -1161,14 +1207,16 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st return myData; } -UBool +U_CDECL_BEGIN +static UBool U_CALLCONV ucol_bld_cleanup(void) { udata_close(invUCA_DATA_MEM); invUCA_DATA_MEM = NULL; - invUCA = NULL; + _staticInvUCA = NULL; return TRUE; } +U_CDECL_END U_CAPI const InverseUCATableHeader * U_EXPORT2 ucol_initInverseUCA(UErrorCode *status) @@ -1176,7 +1224,7 @@ ucol_initInverseUCA(UErrorCode *status) if(U_FAILURE(*status)) return NULL; umtx_lock(NULL); - UBool f = (invUCA == NULL); + UBool f = (_staticInvUCA == NULL); umtx_unlock(NULL); if(f) { @@ -1203,8 +1251,8 @@ ucol_initInverseUCA(UErrorCode *status) } umtx_lock(NULL); - if(invUCA == NULL) { - invUCA = newInvUCA; + if(_staticInvUCA == NULL) { + _staticInvUCA = newInvUCA; invUCA_DATA_MEM = result; result = NULL; newInvUCA = NULL; @@ -1218,11 +1266,11 @@ ucol_initInverseUCA(UErrorCode *status) //uprv_free(newInvUCA); } else { - ucln_i18n_registerCleanup(); + ucln_i18n_registerCleanup(UCLN_I18N_UCOL_BLD, ucol_bld_cleanup); } } } - return invUCA; + return _staticInvUCA; } #endif /* #if !UCONFIG_NO_COLLATION */