X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..48b980fed3435926e0b3a8d72ecb58be703a1c7a:/icuSources/i18n/ucol_res.cpp diff --git a/icuSources/i18n/ucol_res.cpp b/icuSources/i18n/ucol_res.cpp index 578fa8a3..7796ad15 100644 --- a/icuSources/i18n/ucol_res.cpp +++ b/icuSources/i18n/ucol_res.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1996-2006, International Business Machines +* Copyright (C) 1996-2008, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucol_res.cpp @@ -38,33 +38,125 @@ #include "ustr_imp.h" #include "cstring.h" #include "umutex.h" +#include "ucln_in.h" #include "ustrenum.h" #include "putilimp.h" #include "utracimp.h" #include "cmemory.h" +U_NAMESPACE_USE + +// static UCA. There is only one. Collators don't use it. +// It is referenced only in ucol_initUCA and ucol_cleanup +static UCollator* _staticUCA = NULL; +// static pointer to udata memory. Inited in ucol_initUCA +// used for cleanup in ucol_cleanup +static UDataMemory* UCA_DATA_MEM = NULL; + U_CDECL_BEGIN -static void U_CALLCONV -ucol_prv_closeResources(UCollator *coll) { - if(coll->rb != NULL) { /* pointing to read-only memory */ - ures_close(coll->rb); - } - if(coll->elements != NULL) { - ures_close(coll->elements); - } +static UBool U_CALLCONV +ucol_res_cleanup(void) +{ + if (UCA_DATA_MEM) { + udata_close(UCA_DATA_MEM); + UCA_DATA_MEM = NULL; + } + if (_staticUCA) { + ucol_close(_staticUCA); + _staticUCA = NULL; + } + return TRUE; +} + +static UBool U_CALLCONV +isAcceptableUCA(void * /*context*/, + const char * /*type*/, const char * /*name*/, + const UDataInfo *pInfo){ + /* context, type & name are intentionally not used */ + if( pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */ + pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 && + pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 && + pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 && + pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 && + pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1// && + //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 && + //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh + //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh + ) { + UVersionInfo UCDVersion; + u_getUnicodeVersion(UCDVersion); + return (UBool)(pInfo->dataVersion[0]==UCDVersion[0] + && pInfo->dataVersion[1]==UCDVersion[1]); + //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2] + //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]); + } else { + return FALSE; + } } U_CDECL_END +/* do not close UCA returned by ucol_initUCA! */ +UCollator * +ucol_initUCA(UErrorCode *status) { + if(U_FAILURE(*status)) { + return NULL; + } + UBool needsInit; + UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit); + + if(needsInit) { + UDataMemory *result = udata_openChoice(NULL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status); + + if(U_SUCCESS(*status)){ + UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status); + if(U_SUCCESS(*status)){ + umtx_lock(NULL); + if(_staticUCA == NULL) { + _staticUCA = newUCA; + newUCA = NULL; + UCA_DATA_MEM = result; + result = NULL; + } + umtx_unlock(NULL); + + ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup); + if(newUCA != NULL) { + ucol_close(newUCA); + udata_close(result); + } + // Initalize variables for implicit generation + uprv_uca_initImplicitConstants(status); + }else{ + ucol_close(newUCA); + udata_close(result); + } + } + else { + udata_close(result); + } + } + return _staticUCA; +} + +U_CAPI void U_EXPORT2 +ucol_forgetUCA(void) +{ + _staticUCA = NULL; + UCA_DATA_MEM = NULL; +} + /****************************************************************************/ /* Following are the open/close functions */ /* */ /****************************************************************************/ static UCollator* tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) { - int32_t rulesLen = 0; - const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status); - return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status); - + int32_t rulesLen = 0; + const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status); + return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status); } @@ -74,6 +166,7 @@ U_CFUNC UCollator* ucol_open_internal(const char *loc, UErrorCode *status) { + UErrorCode intStatus = U_ZERO_ERROR; const UCollator* UCA = ucol_initUCA(status); /* New version */ @@ -91,7 +184,7 @@ ucol_open_internal(const char *loc, // if there is a keyword, we pick it up and try to get elements if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) { // no keyword. we try to find the default setting, which will give us the keyword value - UErrorCode intStatus = U_ZERO_ERROR; + intStatus = U_ZERO_ERROR; // finding default value does not affect collation fallback status UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus); if(U_SUCCESS(intStatus)) { @@ -105,34 +198,39 @@ ucol_open_internal(const char *loc, } ures_close(defaultColl); } - collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status); + collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status); + collations = NULL; // We just reused the collations object as collElem. UResourceBundle *binary = NULL; if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */ *status = U_USING_DEFAULT_WARNING; result = ucol_initCollator(UCA->image, result, UCA, status); + if (U_FAILURE(*status)) { + goto clean; + } // if we use UCA, real locale is root - result->rb = ures_open(U_ICUDATA_COLL, "", status); - result->elements = ures_open(U_ICUDATA_COLL, "", status); + ures_close(b); + b = ures_open(U_ICUDATA_COLL, "", status); + ures_close(collElem); + collElem = ures_open(U_ICUDATA_COLL, "", status); if(U_FAILURE(*status)) { goto clean; } - ures_close(b); result->hasRealData = FALSE; } else if(U_SUCCESS(*status)) { - int32_t len = 0; - UErrorCode binaryStatus = U_ZERO_ERROR; + intStatus = U_ZERO_ERROR; - binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus); + binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus); - if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */ + if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */ binary = NULL; result = tryOpeningFromRules(collElem, status); if(U_FAILURE(*status)) { goto clean; } } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */ + int32_t len = 0; const uint8_t *inData = ures_getBinary(binary, &len, status); UCATableHeader *colData = (UCATableHeader *)inData; if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 || @@ -162,40 +260,50 @@ ucol_open_internal(const char *loc, result->freeImageOnClose = FALSE; } } - result->rb = b; - result->elements = collElem; - len = 0; - binaryStatus = U_ZERO_ERROR; - result->rules = ures_getStringByKey(result->elements, "Sequence", &len, &binaryStatus); - result->rulesLength = len; + intStatus = U_ZERO_ERROR; + result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus); result->freeRulesOnClose = FALSE; } else { /* There is another error, and we're just gonna clean up */ goto clean; } - result->validLocale = NULL; // default is to use rb info + intStatus = U_ZERO_ERROR; + result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus); if(loc == NULL) { - loc = ures_getLocale(result->rb, status); + loc = ures_getLocale(b, status); } - result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char)); + result->requestedLocale = uprv_strdup(loc); /* test for NULL */ if (result->requestedLocale == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; goto clean; } - uprv_strcpy(result->requestedLocale, loc); + loc = ures_getLocale(collElem, status); + result->actualLocale = uprv_strdup(loc); + /* test for NULL */ + if (result->actualLocale == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto clean; + } + loc = ures_getLocale(b, status); + result->validLocale = uprv_strdup(loc); + /* test for NULL */ + if (result->validLocale == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto clean; + } + ures_close(b); + ures_close(collElem); ures_close(binary); - ures_close(collations); //??? we have to decide on that. Probably affects something :) - result->resCleaner = ucol_prv_closeResources; return result; clean: ures_close(b); ures_close(collElem); - ures_close(collations); ures_close(binary); + ucol_close(result); return NULL; } @@ -203,206 +311,217 @@ U_CAPI UCollator* ucol_open(const char *loc, UErrorCode *status) { - UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); - UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); - UCollator *result = NULL; + U_NAMESPACE_USE + + UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); + UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); + UCollator *result = NULL; - u_init(status); + u_init(status); #if !UCONFIG_NO_SERVICE - result = Collator::createUCollator(loc, status); - if (result == NULL) + result = Collator::createUCollator(loc, status); + if (result == NULL) #endif - { - result = ucol_open_internal(loc, status); - } - UTRACE_EXIT_PTR_STATUS(result, *status); - return result; + { + result = ucol_open_internal(loc, status); + } + UTRACE_EXIT_PTR_STATUS(result, *status); + return result; } U_CAPI UCollator* U_EXPORT2 ucol_openRules( const UChar *rules, - int32_t rulesLength, - UColAttributeValue normalizationMode, - UCollationStrength strength, - UParseError *parseError, - UErrorCode *status) + int32_t rulesLength, + UColAttributeValue normalizationMode, + UCollationStrength strength, + UParseError *parseError, + UErrorCode *status) { - uint32_t listLen = 0; - UColTokenParser src; - UColAttributeValue norm; - UParseError tErr; - - if(status == NULL || U_FAILURE(*status)){ - return 0; - } - - u_init(status); - if (U_FAILURE(*status)) { - return NULL; - } - - if(rules == NULL || rulesLength < -1) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(rulesLength == -1) { - rulesLength = u_strlen(rules); - } - - if(parseError == NULL){ - parseError = &tErr; - } - - switch(normalizationMode) { - case UCOL_OFF: - case UCOL_ON: - case UCOL_DEFAULT: - norm = normalizationMode; - break; - default: - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - UCollator *UCA = ucol_initUCA(status); - - if(U_FAILURE(*status)){ - return NULL; - } + UColTokenParser src; + UColAttributeValue norm; + UParseError tErr; - ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status); - listLen = ucol_tok_assembleTokenList(&src,parseError, status); + if(status == NULL || U_FAILURE(*status)){ + return 0; + } - if(U_FAILURE(*status)) { - /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */ - /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */ - /* so something might be done here... or on lower level */ -#ifdef UCOL_DEBUG - if(*status == U_ILLEGAL_ARGUMENT_ERROR) { - fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source); - } else { - fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source); + u_init(status); + if (U_FAILURE(*status)) { + return NULL; } -#endif - ucol_tok_closeTokenList(&src); - return NULL; - } - UCollator *result = NULL; - UCATableHeader *table = NULL; - if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */ - /* also, if we wanted to remove some contractions, we should make a tailoring */ - table = ucol_assembleTailoringTable(&src, status); - if(U_SUCCESS(*status)) { - // builder version - table->version[0] = UCOL_BUILDER_VERSION; - // no tailoring information on this level - table->version[1] = table->version[2] = table->version[3] = 0; - // set UCD version - u_getUnicodeVersion(table->UCDVersion); - // set UCA version - uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)); - result = ucol_initCollator(table, 0, UCA, status); - result->hasRealData = TRUE; - result->freeImageOnClose = TRUE; - } - } else { /* no rules, but no error either */ - // must be only options - // We will init the collator from UCA - result = ucol_initCollator(UCA->image, 0, UCA, status); - // And set only the options - UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); - /* test for NULL */ - if (opts == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; + if(rules == NULL || rulesLength < -1) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(rulesLength == -1) { + rulesLength = u_strlen(rules); + } + + if(parseError == NULL){ + parseError = &tErr; + } + + switch(normalizationMode) { + case UCOL_OFF: + case UCOL_ON: + case UCOL_DEFAULT: + norm = normalizationMode; + break; + default: + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + UCollator *result = NULL; + UCATableHeader *table = NULL; + UCollator *UCA = ucol_initUCA(status); + + if(U_FAILURE(*status)){ + return NULL; + } + + ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status); + ucol_tok_assembleTokenList(&src,parseError, status); + + if(U_FAILURE(*status)) { + /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */ + /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */ + /* so something might be done here... or on lower level */ +#ifdef UCOL_DEBUG + if(*status == U_ILLEGAL_ARGUMENT_ERROR) { + fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source); + } else { + fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source); + } +#endif goto cleanup; } - uprv_memcpy(opts, src.opts, sizeof(UColOptionSet)); - ucol_setOptionsFromHeader(result, opts, status); - result->freeOptionsOnClose = TRUE; - result->hasRealData = FALSE; - result->freeImageOnClose = FALSE; - } - - if(U_SUCCESS(*status)) { - UChar *newRules; - result->dataVersion[0] = UCOL_BUILDER_VERSION; - if(rulesLength > 0) { - newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR); - /* test for NULL */ - if (newRules == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR); - newRules[rulesLength]=0; - result->rules = newRules; - result->rulesLength = rulesLength; - result->freeRulesOnClose = TRUE; - } - result->rb = NULL; - result->elements = NULL; - result->validLocale = NULL; - result->requestedLocale = NULL; - ucol_setAttribute(result, UCOL_STRENGTH, strength, status); - ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status); - } else { -cleanup: - if(result != NULL) { - ucol_close(result); + + if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */ + /* also, if we wanted to remove some contractions, we should make a tailoring */ + table = ucol_assembleTailoringTable(&src, status); + if(U_SUCCESS(*status)) { + // builder version + table->version[0] = UCOL_BUILDER_VERSION; + // no tailoring information on this level + table->version[1] = table->version[2] = table->version[3] = 0; + // set UCD version + u_getUnicodeVersion(table->UCDVersion); + // set UCA version + uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)); + result = ucol_initCollator(table, 0, UCA, status); + if (U_FAILURE(*status)) { + goto cleanup; + } + result->hasRealData = TRUE; + result->freeImageOnClose = TRUE; + } + } else { /* no rules, but no error either */ + // must be only options + // We will init the collator from UCA + result = ucol_initCollator(UCA->image, 0, UCA, status); + // Check for null result + if (U_FAILURE(*status)) { + goto cleanup; + } + // And set only the options + UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); + /* test for NULL */ + if (opts == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + uprv_memcpy(opts, src.opts, sizeof(UColOptionSet)); + ucol_setOptionsFromHeader(result, opts, status); + result->freeOptionsOnClose = TRUE; + result->hasRealData = FALSE; + result->freeImageOnClose = FALSE; + } + + if(U_SUCCESS(*status)) { + UChar *newRules; + result->dataVersion[0] = UCOL_BUILDER_VERSION; + if(rulesLength > 0) { + newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR); + /* test for NULL */ + if (newRules == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR); + newRules[rulesLength]=0; + result->rules = newRules; + result->rulesLength = rulesLength; + result->freeRulesOnClose = TRUE; + } + result->ucaRules = NULL; + result->actualLocale = NULL; + result->validLocale = NULL; + result->requestedLocale = NULL; + ucol_setAttribute(result, UCOL_STRENGTH, strength, status); + ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status); } else { - if(table != NULL) { - uprv_free(table); - } +cleanup: + if(result != NULL) { + ucol_close(result); + } else { + if(table != NULL) { + uprv_free(table); + } + } + result = NULL; } - result = NULL; - } - ucol_tok_closeTokenList(&src); + ucol_tok_closeTokenList(&src); - return result; + return result; } U_CAPI int32_t U_EXPORT2 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { - UErrorCode status = U_ZERO_ERROR; - int32_t len = 0; - int32_t UCAlen = 0; - const UChar* ucaRules = 0; - const UChar *rules = ucol_getRules(coll, &len); - if(delta == UCOL_FULL_RULES) { - /* take the UCA rules and append real rules at the end */ - /* UCA rules will be probably coming from the root RB */ - ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status); - /* - UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status); - UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status); - ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status); - ures_close(uca); - ures_close(cresb); - */ - } - if(U_FAILURE(status)) { - return 0; - } - if(buffer!=0 && bufferLen>0){ - *buffer=0; - if(UCAlen > 0) { - u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen)); - } - if(len > 0 && bufferLen > UCAlen) { - u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen)); - } - } - return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status); + UErrorCode status = U_ZERO_ERROR; + int32_t len = 0; + int32_t UCAlen = 0; + const UChar* ucaRules = 0; + const UChar *rules = ucol_getRules(coll, &len); + if(delta == UCOL_FULL_RULES) { + /* take the UCA rules and append real rules at the end */ + /* UCA rules will be probably coming from the root RB */ + ucaRules = coll->ucaRules; + if (ucaRules) { + UCAlen = u_strlen(ucaRules); + } + /* + ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status); + UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status); + UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status); + ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status); + ures_close(uca); + ures_close(cresb); + */ + } + if(U_FAILURE(status)) { + return 0; + } + if(buffer!=0 && bufferLen>0){ + *buffer=0; + if(UCAlen > 0) { + u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen)); + } + if(len > 0 && bufferLen > UCAlen) { + u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen)); + } + } + return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status); } static const UChar _NUL = 0; U_CAPI const UChar* U_EXPORT2 ucol_getRules( const UCollator *coll, - int32_t *length) + int32_t *length) { if(coll->rules != NULL) { *length = coll->rulesLength; @@ -416,148 +535,158 @@ ucol_getRules( const UCollator *coll, U_CAPI UBool U_EXPORT2 ucol_equals(const UCollator *source, const UCollator *target) { - UErrorCode status = U_ZERO_ERROR; - // if pointers are equal, collators are equal - if(source == target) { - return TRUE; - } - int32_t i = 0, j = 0; - // if any of attributes are different, collators are not equal - for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { - if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { - return FALSE; - } - } - - int32_t sourceRulesLen = 0, targetRulesLen = 0; - const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); - const UChar *targetRules = ucol_getRules(target, &targetRulesLen); - - if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { - // all the attributes are equal and the rules are equal - collators are equal - return(TRUE); - } - // hard part, need to construct tree from rules and see if they yield the same tailoring - UBool result = TRUE; - UParseError parseError; - UColTokenParser sourceParser, targetParser; - int32_t sourceListLen = 0, targetListLen = 0; - ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status); - ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status); - sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); - targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); - - if(sourceListLen != targetListLen) { - // different number of resets - result = FALSE; - } else { - UColToken *sourceReset = NULL, *targetReset = NULL; - UChar *sourceResetString = NULL, *targetResetString = NULL; - int32_t sourceStringLen = 0, targetStringLen = 0; - for(i = 0; i < sourceListLen; i++) { - sourceReset = sourceParser.lh[i].reset; - sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); - sourceStringLen = sourceReset->source >> 24; - for(j = 0; j < sourceListLen; j++) { - targetReset = targetParser.lh[j].reset; - targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); - targetStringLen = targetReset->source >> 24; - if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { - sourceReset = sourceParser.lh[i].first; - targetReset = targetParser.lh[j].first; - while(sourceReset != NULL && targetReset != NULL) { + UErrorCode status = U_ZERO_ERROR; + // if pointers are equal, collators are equal + if(source == target) { + return TRUE; + } + int32_t i = 0, j = 0; + // if any of attributes are different, collators are not equal + for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { + if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { + return FALSE; + } + } + + int32_t sourceRulesLen = 0, targetRulesLen = 0; + const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); + const UChar *targetRules = ucol_getRules(target, &targetRulesLen); + + if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { + // all the attributes are equal and the rules are equal - collators are equal + return(TRUE); + } + // hard part, need to construct tree from rules and see if they yield the same tailoring + UBool result = TRUE; + UParseError parseError; + UColTokenParser sourceParser, targetParser; + int32_t sourceListLen = 0, targetListLen = 0; + ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status); + ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status); + sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); + targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); + + if(sourceListLen != targetListLen) { + // different number of resets + result = FALSE; + } else { + UColToken *sourceReset = NULL, *targetReset = NULL; + UChar *sourceResetString = NULL, *targetResetString = NULL; + int32_t sourceStringLen = 0, targetStringLen = 0; + for(i = 0; i < sourceListLen; i++) { + sourceReset = sourceParser.lh[i].reset; sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); sourceStringLen = sourceReset->source >> 24; - targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); - targetStringLen = targetReset->source >> 24; - if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { - result = FALSE; - goto returnResult; - } - // probably also need to check the expansions - if(sourceReset->expansion) { - if(!targetReset->expansion) { - result = FALSE; - goto returnResult; - } else { - // compare expansions - sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); - sourceStringLen = sourceReset->expansion >> 24; - targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); - targetStringLen = targetReset->expansion >> 24; - if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { - result = FALSE; - goto returnResult; + for(j = 0; j < sourceListLen; j++) { + targetReset = targetParser.lh[j].reset; + targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); + targetStringLen = targetReset->source >> 24; + if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { + sourceReset = sourceParser.lh[i].first; + targetReset = targetParser.lh[j].first; + while(sourceReset != NULL && targetReset != NULL) { + sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); + sourceStringLen = sourceReset->source >> 24; + targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); + targetStringLen = targetReset->source >> 24; + if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { + result = FALSE; + goto returnResult; + } + // probably also need to check the expansions + if(sourceReset->expansion) { + if(!targetReset->expansion) { + result = FALSE; + goto returnResult; + } else { + // compare expansions + sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); + sourceStringLen = sourceReset->expansion >> 24; + targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); + targetStringLen = targetReset->expansion >> 24; + if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { + result = FALSE; + goto returnResult; + } + } + } else { + if(targetReset->expansion) { + result = FALSE; + goto returnResult; + } + } + sourceReset = sourceReset->next; + targetReset = targetReset->next; + } + if(sourceReset != targetReset) { // at least one is not NULL + // there are more tailored elements in one list + result = FALSE; + goto returnResult; + } + + + break; } - } - } else { - if(targetReset->expansion) { + } + // couldn't find the reset anchor, so the collators are not equal + if(j == sourceListLen) { result = FALSE; goto returnResult; - } } - sourceReset = sourceReset->next; - targetReset = targetReset->next; - } - if(sourceReset != targetReset) { // at least one is not NULL - // there are more tailored elements in one list - result = FALSE; - goto returnResult; - } - - - break; } - } - // couldn't find the reset anchor, so the collators are not equal - if(j == sourceListLen) { - result = FALSE; - goto returnResult; - } } - } returnResult: - ucol_tok_closeTokenList(&sourceParser); - ucol_tok_closeTokenList(&targetParser); - return result; + ucol_tok_closeTokenList(&sourceParser); + ucol_tok_closeTokenList(&targetParser); + return result; } U_CAPI int32_t U_EXPORT2 ucol_getDisplayName( const char *objLoc, - const char *dispLoc, - UChar *result, - int32_t resultLength, - UErrorCode *status) + const char *dispLoc, + UChar *result, + int32_t resultLength, + UErrorCode *status) { - - if(U_FAILURE(*status)) return -1; - UnicodeString dst; - if(!(result==NULL && resultLength==0)) { - // NULL destination for pure preflighting: empty dummy string - // otherwise, alias the destination buffer - dst.setTo(result, 0, resultLength); - } - Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); - return dst.extract(result, resultLength, *status); + U_NAMESPACE_USE + + if(U_FAILURE(*status)) return -1; + UnicodeString dst; + if(!(result==NULL && resultLength==0)) { + // NULL destination for pure preflighting: empty dummy string + // otherwise, alias the destination buffer + dst.setTo(result, 0, resultLength); + } + Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); + return dst.extract(result, resultLength, *status); } U_CAPI const char* U_EXPORT2 ucol_getAvailable(int32_t index) { - return uloc_getAvailable(index); + int32_t count = 0; + const Locale *loc = Collator::getAvailableLocales(count); + if (loc != NULL && index < count) { + return loc[index].getName(); + } + return NULL; } U_CAPI int32_t U_EXPORT2 ucol_countAvailable() { - return uloc_countAvailable(); + int32_t count = 0; + Collator::getAvailableLocales(count); + return count; } #if !UCONFIG_NO_SERVICE U_CAPI UEnumeration* U_EXPORT2 ucol_openAvailableLocales(UErrorCode *status) { + U_NAMESPACE_USE + // This is a wrapper over Collator::getAvailableLocales() if (U_FAILURE(*status)) { return NULL; @@ -573,9 +702,9 @@ ucol_openAvailableLocales(UErrorCode *status) { // Note: KEYWORDS[0] != RESOURCE_NAME - alan -static const char* RESOURCE_NAME = "collations"; +static const char RESOURCE_NAME[] = "collations"; -static const char* KEYWORDS[] = { "collation" }; +static const char* const KEYWORDS[] = { "collation" }; #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0])) @@ -590,10 +719,13 @@ ucol_getKeywords(UErrorCode *status) { U_CAPI UEnumeration* U_EXPORT2 ucol_getKeywordValues(const char *keyword, UErrorCode *status) { + if (U_FAILURE(*status)) { + return NULL; + } // hard-coded to accept exactly one collation keyword // modify if additional collation keyword is added later - if (U_SUCCESS(*status) && - keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) { + if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) + { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } @@ -603,117 +735,112 @@ ucol_getKeywordValues(const char *keyword, UErrorCode *status) { U_CAPI int32_t U_EXPORT2 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, const char* keyword, const char* locale, - UBool* isAvailable, UErrorCode* status) { + UBool* isAvailable, UErrorCode* status) +{ // N.B.: Resource name is "collations" but keyword is "collation" return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, - "collations", keyword, locale, - isAvailable, TRUE, status); + "collations", keyword, locale, + isAvailable, TRUE, status); } /* returns the locale name the collation data comes from */ U_CAPI const char * U_EXPORT2 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { - return ucol_getLocaleByType(coll, type, status); + return ucol_getLocaleByType(coll, type, status); } U_CAPI const char * U_EXPORT2 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { - const char *result = NULL; - if(status == NULL || U_FAILURE(*status)) { - return NULL; - } - UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); - UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); - - switch(type) { - case ULOC_ACTUAL_LOCALE: - // validLocale is set only if service registration has explicitly set the - // requested and valid locales. if this is the case, the actual locale - // is considered to be the valid locale. - if (coll->validLocale != NULL) { - result = coll->validLocale; - } else if(coll->elements != NULL) { - result = ures_getLocale(coll->elements, status); - } - break; - case ULOC_VALID_LOCALE: - if (coll->validLocale != NULL) { - result = coll->validLocale; - } else if(coll->rb != NULL) { - result = ures_getLocale(coll->rb, status); - } - break; - case ULOC_REQUESTED_LOCALE: - result = coll->requestedLocale; - break; - default: - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - UTRACE_DATA1(UTRACE_INFO, "result = %s", result); - UTRACE_EXIT_STATUS(*status); - return result; + const char *result = NULL; + if(status == NULL || U_FAILURE(*status)) { + return NULL; + } + UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); + UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); + + switch(type) { + case ULOC_ACTUAL_LOCALE: + result = coll->actualLocale; + break; + case ULOC_VALID_LOCALE: + result = coll->validLocale; + break; + case ULOC_REQUESTED_LOCALE: + result = coll->requestedLocale; + break; + default: + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + UTRACE_DATA1(UTRACE_INFO, "result = %s", result); + UTRACE_EXIT_STATUS(*status); + return result; } -U_CAPI void U_EXPORT2 -ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt) +U_CFUNC void U_EXPORT2 +ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt) { - if (coll) { - if (coll->validLocale) { - uprv_free(coll->validLocale); - } - coll->validLocale = validLocaleToAdopt; - if (coll->requestedLocale) { // should always have - uprv_free(coll->requestedLocale); + if (coll) { + if (coll->validLocale) { + uprv_free(coll->validLocale); + } + coll->validLocale = validLocaleToAdopt; + if (coll->requestedLocale) { // should always have + uprv_free(coll->requestedLocale); + } + coll->requestedLocale = requestedLocaleToAdopt; + if (coll->actualLocale) { + uprv_free(coll->actualLocale); + } + coll->actualLocale = actualLocaleToAdopt; } - coll->requestedLocale = requestedLocaleToAdopt; - } } U_CAPI USet * U_EXPORT2 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) { - if(status == NULL || U_FAILURE(*status)) { - return NULL; - } - if(coll == NULL || coll->UCA == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - UParseError parseError; - UColTokenParser src; - int32_t rulesLen = 0; - const UChar *rules = ucol_getRules(coll, &rulesLen); - const UChar *current = NULL; - UBool startOfRules = TRUE; - // we internally use the C++ class, for the following reasons: - // 1. we need to utilize canonical iterator, which is a C++ only class - // 2. canonical iterator returns UnicodeStrings - USet cannot take them - // 3. USet is internally really UnicodeSet, C is just a wrapper - UnicodeSet *tailored = new UnicodeSet(); - UnicodeString pattern; - UnicodeString empty; - CanonicalIterator it(empty, *status); - - - // The idea is to tokenize the rule set. For each non-reset token, - // we add all the canonicaly equivalent FCD sequences - ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status); - while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError, status)) != NULL) { - startOfRules = FALSE; - if(src.parsedToken.strength != UCOL_TOK_RESET) { - const UChar *stuff = src.source+(src.parsedToken.charsOffset); - it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status); - pattern = it.next(); - while(!pattern.isBogus()) { - if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) { - tailored->add(pattern); + U_NAMESPACE_USE + + if(status == NULL || U_FAILURE(*status)) { + return NULL; + } + if(coll == NULL || coll->UCA == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + UParseError parseError; + UColTokenParser src; + int32_t rulesLen = 0; + const UChar *rules = ucol_getRules(coll, &rulesLen); + UBool startOfRules = TRUE; + // we internally use the C++ class, for the following reasons: + // 1. we need to utilize canonical iterator, which is a C++ only class + // 2. canonical iterator returns UnicodeStrings - USet cannot take them + // 3. USet is internally really UnicodeSet, C is just a wrapper + UnicodeSet *tailored = new UnicodeSet(); + UnicodeString pattern; + UnicodeString empty; + CanonicalIterator it(empty, *status); + + + // The idea is to tokenize the rule set. For each non-reset token, + // we add all the canonicaly equivalent FCD sequences + ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status); + while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) { + startOfRules = FALSE; + if(src.parsedToken.strength != UCOL_TOK_RESET) { + const UChar *stuff = src.source+(src.parsedToken.charsOffset); + it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status); + pattern = it.next(); + while(!pattern.isBogus()) { + if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) { + tailored->add(pattern); + } + pattern = it.next(); + } } - pattern = it.next(); - } } - } - ucol_tok_closeTokenList(&src); - return (USet *)tailored; + ucol_tok_closeTokenList(&src); + return (USet *)tailored; } #endif /* #if !UCONFIG_NO_COLLATION */