X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..08b89b0a244153b9f5bbb2f49c55ab0f7298122e:/icuSources/i18n/plurrule.cpp diff --git a/icuSources/i18n/plurrule.cpp b/icuSources/i18n/plurrule.cpp index 987bb243..2984bc19 100644 --- a/icuSources/i18n/plurrule.cpp +++ b/icuSources/i18n/plurrule.cpp @@ -1,49 +1,58 @@ /* ******************************************************************************* -* Copyright (C) 2007-2010, International Business Machines Corporation and +* Copyright (C) 2007-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * -* File PLURRULE.CPP -* -* Modification History: -* -* Date Name Description -******************************************************************************* +* File plurrule.cpp */ +#include +#include -#include "unicode/uniset.h" #include "unicode/utypes.h" -#include "unicode/ures.h" +#include "unicode/localpointer.h" #include "unicode/plurrule.h" +#include "unicode/upluralrules.h" +#include "unicode/ures.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" +#include "digitlst.h" #include "hash.h" +#include "locutil.h" #include "mutex.h" +#include "patternprops.h" #include "plurrule_impl.h" #include "putilimp.h" #include "ucln_in.h" #include "ustrfmt.h" -#include "locutil.h" - -/* -// TODO(claireho): remove stdio -#include "stdio.h" -*/ +#include "uassert.h" +#include "uvectr32.h" +#include "sharedpluralrules.h" +#include "lrucache.h" #if !UCONFIG_NO_FORMATTING -U_NAMESPACE_BEGIN +static icu::LRUCache *gPluralRulesCache = NULL; +static UMutex gPluralRulesCacheMutex = U_MUTEX_INITIALIZER; +static icu::UInitOnce gPluralRulesCacheInitOnce = U_INITONCE_INITIALIZER; +U_CDECL_BEGIN +static UBool U_CALLCONV plurrules_cleanup(void) { + gPluralRulesCacheInitOnce.reset(); + if (gPluralRulesCache) { + delete gPluralRulesCache; + gPluralRulesCache = NULL; + } + return TRUE; +} +U_CDECL_END + +U_NAMESPACE_BEGIN #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) -static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0}; -static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0}; -static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0}; -static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0}; -static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0}; static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; static const UChar PK_IN[]={LOW_I,LOW_N,0}; @@ -53,35 +62,36 @@ static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; static const UChar PK_OR[]={LOW_O,LOW_R,0}; static const UChar PK_VAR_N[]={LOW_N,0}; +static const UChar PK_VAR_I[]={LOW_I,0}; +static const UChar PK_VAR_F[]={LOW_F,0}; +static const UChar PK_VAR_T[]={LOW_T,0}; +static const UChar PK_VAR_V[]={LOW_V,0}; static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; +static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0}; +static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0}; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) -PluralRules::PluralRules(UErrorCode& status) +PluralRules::PluralRules(UErrorCode& /*status*/) : UObject(), mRules(NULL) { - if (U_FAILURE(status)) { - return; - } - mParser = new RuleParser(); - if (mParser==NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } } PluralRules::PluralRules(const PluralRules& other) : UObject(other), - mRules(NULL), - mParser(new RuleParser()) + mRules(NULL) { *this=other; } PluralRules::~PluralRules() { delete mRules; - delete mParser; +} + +SharedPluralRules::~SharedPluralRules() { + delete ptr; } PluralRules* @@ -99,83 +109,184 @@ PluralRules::operator=(const PluralRules& other) { else { mRules = new RuleChain(*other.mRules); } - delete mParser; - mParser = new RuleParser(); } return *this; } +StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) { + StringEnumeration *result = new PluralAvailableLocalesEnumeration(status); + if (result == NULL && U_SUCCESS(status)) { + status = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(status)) { + delete result; + result = NULL; + } + return result; +} + + PluralRules* U_EXPORT2 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { - RuleChain rules; - if (U_FAILURE(status)) { return NULL; } + + PluralRuleParser parser; PluralRules *newRules = new PluralRules(status); - if ( (newRules != NULL)&& U_SUCCESS(status) ) { - newRules->parseDescription((UnicodeString &)description, rules, status); - if (U_SUCCESS(status)) { - newRules->addRules(rules); - } + if (U_SUCCESS(status) && newRules == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; } + parser.parse(description, newRules, status); if (U_FAILURE(status)) { delete newRules; - return NULL; - } - else { - return newRules; + newRules = NULL; } + return newRules; } + PluralRules* U_EXPORT2 PluralRules::createDefaultRules(UErrorCode& status) { - return createRules(PLURAL_DEFAULT_RULE, status); + return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status); +} + +/******************************************************************************/ +/* Create PluralRules cache */ + +static SharedObject *U_CALLCONV createSharedPluralRules( + const char *localeId, UErrorCode &status) { + if (U_FAILURE(status)) { + return NULL; + } + PluralRules *pr = PluralRules::internalForLocale( + localeId, UPLURAL_TYPE_CARDINAL, status); + if (U_FAILURE(status)) { + return NULL; + } + SharedObject *result = new SharedPluralRules(pr); + if (result == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + delete pr; + return NULL; + } + return result; +} + +static void U_CALLCONV pluralRulesCacheInit(UErrorCode &status) { + U_ASSERT(gPluralRulesCache == NULL); + ucln_i18n_registerCleanup(UCLN_I18N_PLURAL_RULE, plurrules_cleanup); + gPluralRulesCache = new SimpleLRUCache(100, &createSharedPluralRules, status); + if (U_FAILURE(status)) { + delete gPluralRulesCache; + gPluralRulesCache = NULL; + } +} + +static void getSharedPluralRulesFromCache( + const char *locale, + const SharedPluralRules *&ptr, + UErrorCode &status) { + umtx_initOnce(gPluralRulesCacheInitOnce, &pluralRulesCacheInit, status); + if (U_FAILURE(status)) { + return; + } + Mutex lock(&gPluralRulesCacheMutex); + gPluralRulesCache->get(locale, ptr, status); +} + + + + +/* end plural rules cache */ +/******************************************************************************/ + +const SharedPluralRules* U_EXPORT2 +PluralRules::createSharedInstance( + const Locale& locale, UPluralType type, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + if (type != UPLURAL_TYPE_CARDINAL) { + status = U_UNSUPPORTED_ERROR; + return NULL; + } + const SharedPluralRules *result = NULL; + getSharedPluralRulesFromCache(locale.getName(), result, status); + return result; } PluralRules* U_EXPORT2 PluralRules::forLocale(const Locale& locale, UErrorCode& status) { - RuleChain rChain; + return forLocale(locale, UPLURAL_TYPE_CARDINAL, status); +} + +PluralRules* U_EXPORT2 +PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) { + if (type != UPLURAL_TYPE_CARDINAL) { + return internalForLocale(locale, type, status); + } + const SharedPluralRules *shared = createSharedInstance( + locale, type, status); + if (U_FAILURE(status)) { + return NULL; + } + PluralRules *result = (*shared)->clone(); + shared->removeRef(); + if (result == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +PluralRules* U_EXPORT2 +PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) { if (U_FAILURE(status)) { return NULL; } + if (type >= UPLURAL_TYPE_COUNT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } PluralRules *newObj = new PluralRules(status); if (newObj==NULL || U_FAILURE(status)) { + delete newObj; return NULL; } - UnicodeString locRule = newObj->getRuleFromResource(locale, status); - if ((locRule.length() != 0) && U_SUCCESS(status)) { - newObj->parseDescription(locRule, rChain, status); - if (U_SUCCESS(status)) { - newObj->addRules(rChain); - } - } - if (U_FAILURE(status)||(locRule.length() == 0)) { - // use default plural rule + UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); + // TODO: which errors, if any, should be returned? + if (locRule.length() == 0) { + // Locales with no specific rules (all numbers have the "other" category + // will return a U_MISSING_RESOURCE_ERROR at this point. This is not + // an error. + locRule = UnicodeString(PLURAL_DEFAULT_RULE); status = U_ZERO_ERROR; - UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE); - newObj->parseDescription(defRule, rChain, status); - newObj->addRules(rChain); } - + PluralRuleParser parser; + parser.parse(locRule, newObj, status); + // TODO: should rule parse errors be returned, or + // should we silently use default rules? + // Original impl used default rules. + // Ask the question to ICU Core. + return newObj; } UnicodeString PluralRules::select(int32_t number) const { - if (mRules == NULL) { - return PLURAL_DEFAULT_RULE; - } - else { - return mRules->select(number); - } + return select(FixedDecimal(number)); } UnicodeString PluralRules::select(double number) const { + return select(FixedDecimal(number)); +} + +UnicodeString +PluralRules::select(const FixedDecimal &number) const { if (mRules == NULL) { - return PLURAL_DEFAULT_RULE; + return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); } else { return mRules->select(number); @@ -186,277 +297,406 @@ StringEnumeration* PluralRules::getKeywords(UErrorCode& status) const { if (U_FAILURE(status)) return NULL; StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); - if (U_FAILURE(status)) return NULL; + if (U_FAILURE(status)) { + delete nameEnumerator; + return NULL; + } return nameEnumerator; } +double +PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) { + // Not Implemented. + return UPLRULES_NO_UNIQUE_VALUE; +} -UBool -PluralRules::isKeyword(const UnicodeString& keyword) const { - if ( keyword == PLURAL_KEYWORD_OTHER ) { - return true; +int32_t +PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */, + int32_t /* destCapacity */, UErrorCode& error) { + error = U_UNSUPPORTED_ERROR; + return 0; +} + + +static double scaleForInt(double d) { + double scale = 1.0; + while (d != floor(d)) { + d = d * 10.0; + scale = scale * 10.0; } - else { - if (mRules==NULL) { - return false; + return scale; +} + +static int32_t +getSamplesFromString(const UnicodeString &samples, double *dest, + int32_t destCapacity, UErrorCode& status) { + int32_t sampleCount = 0; + int32_t sampleStartIdx = 0; + int32_t sampleEndIdx = 0; + + //std::string ss; // TODO: debugging. + // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n"; + for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) { + sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx); + if (sampleEndIdx == -1) { + sampleEndIdx = samples.length(); } - else { - return mRules->isKeyword(keyword); + const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx); + // ss.erase(); + // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n"; + int32_t tildeIndex = sampleRange.indexOf(TILDE); + if (tildeIndex < 0) { + FixedDecimal fixed(sampleRange, status); + double sampleValue = fixed.source; + if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) { + dest[sampleCount++] = sampleValue; + } + } else { + + FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status); + FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status); + double rangeLo = fixedLo.source; + double rangeHi = fixedHi.source; + if (U_FAILURE(status)) { + break; + } + if (rangeHi < rangeLo) { + status = U_INVALID_FORMAT_ERROR; + break; + } + + // For ranges of samples with fraction decimal digits, scale the number up so that we + // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths. + + double scale = scaleForInt(rangeLo); + double t = scaleForInt(rangeHi); + if (t > scale) { + scale = t; + } + rangeLo *= scale; + rangeHi *= scale; + for (double n=rangeLo; n<=rangeHi; n+=1) { + // Hack Alert: don't return any decimal samples with integer values that + // originated from a format with trailing decimals. + // This API is returning doubles, which can't distinguish having displayed + // zeros to the right of the decimal. + // This results in test failures with values mapping back to a different keyword. + double sampleValue = n/scale; + if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) { + dest[sampleCount++] = sampleValue; + } + if (sampleCount >= destCapacity) { + break; + } + } + } + sampleStartIdx = sampleEndIdx + 1; + } + return sampleCount; +} + + +int32_t +PluralRules::getSamples(const UnicodeString &keyword, double *dest, + int32_t destCapacity, UErrorCode& status) { + RuleChain *rc = rulesForKeyword(keyword); + if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) { + return 0; + } + int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status); + if (numSamples == 0) { + numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status); + } + return numSamples; +} + + +RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const { + RuleChain *rc; + for (rc = mRules; rc != NULL; rc = rc->fNext) { + if (rc->fKeyword == keyword) { + break; } } + return rc; +} + + +UBool +PluralRules::isKeyword(const UnicodeString& keyword) const { + if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { + return true; + } + return rulesForKeyword(keyword) != NULL; } UnicodeString PluralRules::getKeywordOther() const { - return PLURAL_KEYWORD_OTHER; + return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); } UBool PluralRules::operator==(const PluralRules& other) const { - int32_t limit; - UBool sameList = TRUE; const UnicodeString *ptrKeyword; UErrorCode status= U_ZERO_ERROR; if ( this == &other ) { return TRUE; } - StringEnumeration* myKeywordList = getKeywords(status); - if (U_FAILURE(status)) { - return FALSE; - } - StringEnumeration* otherKeywordList =other.getKeywords(status); + LocalPointer myKeywordList(getKeywords(status)); + LocalPointer otherKeywordList(other.getKeywords(status)); if (U_FAILURE(status)) { return FALSE; } - if (myKeywordList->count(status)!=otherKeywordList->count(status) || - U_FAILURE(status)) { - sameList = FALSE; + if (myKeywordList->count(status)!=otherKeywordList->count(status)) { + return FALSE; } - else { - myKeywordList->reset(status); - if (U_FAILURE(status)) { + myKeywordList->reset(status); + while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { + if (!other.isKeyword(*ptrKeyword)) { return FALSE; } - while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) { - if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) { - sameList = FALSE; - } - } - otherKeywordList->reset(status); - if (U_FAILURE(status)) { - return FALSE; - } - while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) { - if (U_FAILURE(status)) { - return FALSE; - } - if (!this->isKeyword(*ptrKeyword)) { - sameList = FALSE; - } - } - delete myKeywordList; - delete otherKeywordList; - if (!sameList) { + } + otherKeywordList->reset(status); + while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { + if (!this->isKeyword(*ptrKeyword)) { return FALSE; } } - - if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) { + if (U_FAILURE(status)) { return FALSE; } - UnicodeString myKeyword, otherKeyword; - for (int32_t i=0; iselect(i); - otherKeyword = other.select(i); - if (myKeyword!=otherKeyword) { - return FALSE; - } - } + return TRUE; } + void -PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status) +PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status) { - int32_t ruleIndex=0; - UnicodeString token; - tokenType type; - tokenType prevType=none; - RuleChain *ruleChain=NULL; - AndConstraint *curAndConstraint=NULL; - OrConstraint *orNode=NULL; - RuleChain *lastChain=NULL; - if (U_FAILURE(status)) { return; } - UnicodeString ruleData = data.toLower(); - while (ruleIndex< ruleData.length()) { - mParser->getNextToken(ruleData, &ruleIndex, token, type, status); + U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only! + ruleSrc = &ruleData; + + while (ruleIndex< ruleSrc->length()) { + getNextToken(status); if (U_FAILURE(status)) { return; } - mParser->checkSyntax(prevType, type, status); + checkSyntax(status); if (U_FAILURE(status)) { return; } switch (type) { case tAnd: + U_ASSERT(curAndConstraint != NULL); curAndConstraint = curAndConstraint->add(); break; case tOr: - lastChain = &rules; - while (lastChain->next !=NULL) { - lastChain = lastChain->next; - } - orNode=lastChain->ruleHeader; - while (orNode->next != NULL) { - orNode = orNode->next; + { + U_ASSERT(currentChain != NULL); + OrConstraint *orNode=currentChain->ruleHeader; + while (orNode->next != NULL) { + orNode = orNode->next; + } + orNode->next= new OrConstraint(); + orNode=orNode->next; + orNode->next=NULL; + curAndConstraint = orNode->add(); } - orNode->next= new OrConstraint(); - orNode=orNode->next; - orNode->next=NULL; - curAndConstraint = orNode->add(); break; case tIs: - curAndConstraint->rangeHigh=-1; + U_ASSERT(curAndConstraint != NULL); + U_ASSERT(curAndConstraint->value == -1); + U_ASSERT(curAndConstraint->rangeList == NULL); break; case tNot: - curAndConstraint->notIn=TRUE; + U_ASSERT(curAndConstraint != NULL); + curAndConstraint->negated=TRUE; break; + + case tNotEqual: + curAndConstraint->negated=TRUE; case tIn: - curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; - curAndConstraint->integerOnly = TRUE; - break; case tWithin: - curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; + case tEqual: + U_ASSERT(curAndConstraint != NULL); + curAndConstraint->rangeList = new UVector32(status); + curAndConstraint->rangeList->addElement(-1, status); // range Low + curAndConstraint->rangeList->addElement(-1, status); // range Hi + rangeLowIdx = 0; + rangeHiIdx = 1; + curAndConstraint->value=PLURAL_RANGE_HIGH; + curAndConstraint->integerOnly = (type != tWithin); break; case tNumber: + U_ASSERT(curAndConstraint != NULL); if ( (curAndConstraint->op==AndConstraint::MOD)&& (curAndConstraint->opNum == -1 ) ) { curAndConstraint->opNum=getNumberValue(token); } else { - if (curAndConstraint->rangeLow == -1) { - curAndConstraint->rangeLow=getNumberValue(token); - } - else { - curAndConstraint->rangeHigh=getNumberValue(token); + if (curAndConstraint->rangeList == NULL) { + // this is for an 'is' rule + curAndConstraint->value = getNumberValue(token); + } else { + // this is for an 'in' or 'within' rule + if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) { + curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx); + curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); + } + else { + curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); + if (curAndConstraint->rangeList->elementAti(rangeLowIdx) > + curAndConstraint->rangeList->elementAti(rangeHiIdx)) { + // Range Lower bound > Range Upper bound. + // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently + // used for all plural rule parse errors. + status = U_UNEXPECTED_TOKEN; + break; + } + } } } break; + case tComma: + // TODO: rule syntax checking is inadequate, can happen with badly formed rules. + // Catch cases like "n mod 10, is 1" here instead. + if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) { + status = U_UNEXPECTED_TOKEN; + break; + } + U_ASSERT(curAndConstraint->rangeList->size() >= 2); + rangeLowIdx = curAndConstraint->rangeList->size(); + curAndConstraint->rangeList->addElement(-1, status); // range Low + rangeHiIdx = curAndConstraint->rangeList->size(); + curAndConstraint->rangeList->addElement(-1, status); // range Hi + break; case tMod: + U_ASSERT(curAndConstraint != NULL); curAndConstraint->op=AndConstraint::MOD; break; + case tVariableN: + case tVariableI: + case tVariableF: + case tVariableT: + case tVariableV: + U_ASSERT(curAndConstraint != NULL); + curAndConstraint->digitsType = type; + break; case tKeyword: - if (ruleChain==NULL) { - ruleChain = &rules; + { + RuleChain *newChain = new RuleChain; + if (newChain == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + break; } - else { - while (ruleChain->next!=NULL){ - ruleChain=ruleChain->next; + newChain->fKeyword = token; + if (prules->mRules == NULL) { + prules->mRules = newChain; + } else { + // The new rule chain goes at the end of the linked list of rule chains, + // unless there is an "other" keyword & chain. "other" must remain last. + RuleChain *insertAfter = prules->mRules; + while (insertAfter->fNext!=NULL && + insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){ + insertAfter=insertAfter->fNext; } - ruleChain=ruleChain->next=new RuleChain(); + newChain->fNext = insertAfter->fNext; + insertAfter->fNext = newChain; } - orNode = ruleChain->ruleHeader = new OrConstraint(); + OrConstraint *orNode = new OrConstraint(); + newChain->ruleHeader = orNode; curAndConstraint = orNode->add(); - ruleChain->keyword = token; + currentChain = newChain; + } + break; + + case tInteger: + for (;;) { + getNextToken(status); + if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { + break; + } + if (type == tEllipsis) { + currentChain->fIntegerSamplesUnbounded = TRUE; + continue; + } + currentChain->fIntegerSamples.append(token); + } + break; + + case tDecimal: + for (;;) { + getNextToken(status); + if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { + break; + } + if (type == tEllipsis) { + currentChain->fDecimalSamplesUnbounded = TRUE; + continue; + } + currentChain->fDecimalSamples.append(token); + } break; + default: break; } prevType=type; + if (U_FAILURE(status)) { + break; + } } } -int32_t -PluralRules::getNumberValue(const UnicodeString& token) const { - int32_t i; - char digits[128]; - - i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); - digits[i]='\0'; - - return((int32_t)atoi(digits)); -} - - -void -PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) { - int32_t i=*curIndex; - - localeName.remove(); - while (i< localeData.length()) { - if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) { - break; - } - i++; - } - - while (i< localeData.length()) { - if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) { - break; - } - localeName+=localeData.charAt(i++); - } - *curIndex=i; -} - - -int32_t -PluralRules::getRepeatLimit() const { - if (mRules!=NULL) { - return mRules->getRepeatLimit(); - } - else { - return 0; - } -} - - -void -PluralRules::addRules(RuleChain& rules) { - RuleChain *newRule = new RuleChain(rules); - this->mRules=newRule; - newRule->setRepeatLimit(); -} - UnicodeString -PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) { +PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) { UnicodeString emptyStr; - + if (U_FAILURE(errCode)) { return emptyStr; } - UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode); + LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode)); if(U_FAILURE(errCode)) { - /* total failure, not even root could be opened */ return emptyStr; } - UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode); + const char *typeKey; + switch (type) { + case UPLURAL_TYPE_CARDINAL: + typeKey = "locales"; + break; + case UPLURAL_TYPE_ORDINAL: + typeKey = "locales_ordinals"; + break; + default: + // Must not occur: The caller should have checked for valid types. + errCode = U_ILLEGAL_ARGUMENT_ERROR; + return emptyStr; + } + LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode)); if(U_FAILURE(errCode)) { - ures_close(rb); return emptyStr; - } + } int32_t resLen=0; const char *curLocaleName=locale.getName(); - const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode); + const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); if (s == NULL) { // Check parent locales. UErrorCode status = U_ZERO_ERROR; char parentLocaleName[ULOC_FULLNAME_CAPACITY]; const char *curLocaleName=locale.getName(); - int32_t localeNameLen=0; uprv_strcpy(parentLocaleName, curLocaleName); - - while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName, - ULOC_FULLNAME_CAPACITY, &status)) > 0) { + + while (uloc_getParent(parentLocaleName, parentLocaleName, + ULOC_FULLNAME_CAPACITY, &status) > 0) { resLen=0; - s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status); + s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); if (s != NULL) { errCode = U_ZERO_ERROR; break; @@ -465,66 +705,55 @@ PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) { } } if (s==NULL) { - ures_close(locRes); - ures_close(rb); return emptyStr; } - + char setKey[256]; - UChar result[256]; u_UCharsToChars(s, setKey, resLen + 1); // printf("\n PluralRule: %s\n", setKey); - - UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode); + LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode)); if(U_FAILURE(errCode)) { - ures_close(locRes); - ures_close(rb); return emptyStr; } - resLen=0; - UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode); + LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode)); if (U_FAILURE(errCode)) { - ures_close(ruleRes); - ures_close(locRes); - ures_close(rb); return emptyStr; } - int32_t numberKeys = ures_getSize(setRes); - char *key=NULL; - int32_t len=0; - for(int32_t i=0; idumpRules(rules); + } + return rules; } + AndConstraint::AndConstraint() { op = AndConstraint::NONE; opNum=-1; - rangeLow=-1; - rangeHigh=-1; - notIn=FALSE; - integerOnly=FALSE; + value = -1; + rangeList = NULL; + negated = FALSE; + integerOnly = FALSE; + digitsType = none; next=NULL; } @@ -532,10 +761,16 @@ AndConstraint::AndConstraint() { AndConstraint::AndConstraint(const AndConstraint& other) { this->op = other.op; this->opNum=other.opNum; - this->rangeLow=other.rangeLow; - this->rangeHigh=other.rangeHigh; + this->value=other.value; + this->rangeList=NULL; + if (other.rangeList != NULL) { + UErrorCode status = U_ZERO_ERROR; + this->rangeList = new UVector32(status); + this->rangeList->assign(*other.rangeList, status); + } this->integerOnly=other.integerOnly; - this->notIn=other.notIn; + this->negated=other.negated; + this->digitsType = other.digitsType; if (other.next==NULL) { this->next=NULL; } @@ -545,6 +780,7 @@ AndConstraint::AndConstraint(const AndConstraint& other) { } AndConstraint::~AndConstraint() { + delete rangeList; if (next!=NULL) { delete next; } @@ -552,66 +788,42 @@ AndConstraint::~AndConstraint() { UBool -AndConstraint::isFulfilled(double number) { - UBool result=TRUE; - double value=number; - - if ( op == MOD ) { - value = (int32_t)value % opNum; - } - if ( rangeHigh == -1 ) { - if ( rangeLow == -1 ) { - result = TRUE; // empty rule - } - else { - if ( value == rangeLow ) { - result = TRUE; - } - else { - result = FALSE; - } - } +AndConstraint::isFulfilled(const FixedDecimal &number) { + UBool result = TRUE; + if (digitsType == none) { + // An empty AndConstraint, created by a rule with a keyword but no following expression. + return TRUE; } - else { - if ((rangeLow <= value) && (value <= rangeHigh)) { - if (integerOnly) { - if ( value != (int32_t)value) { - result = FALSE; - } - else { - result = TRUE; - } - } - else { - result = TRUE; - } - } - else { + double n = number.get(digitsType); // pulls n | i | v | f value for the number. + // Will always be positive. + // May be non-integer (n option only) + do { + if (integerOnly && n != uprv_floor(n)) { result = FALSE; + break; } - } - if (notIn) { - return !result; - } - else { - return result; - } -} -int32_t -AndConstraint::updateRepeatLimit(int32_t maxLimit) { - - if ( op == MOD ) { - return uprv_max(opNum, maxLimit); - } - else { - if ( rangeHigh == -1 ) { - return uprv_max(rangeLow, maxLimit); + if (op == MOD) { + n = fmod(n, opNum); + } + if (rangeList == NULL) { + result = value == -1 || // empty rule + n == value; // 'is' rule + break; } - else{ - return uprv_max(rangeHigh, maxLimit); + result = FALSE; // 'in' or 'within' rule + for (int32_t r=0; rsize(); r+=2) { + if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) { + result = TRUE; + break; + } } + } while (FALSE); + + if (negated) { + result = !result; } + return result; } @@ -659,17 +871,17 @@ OrConstraint::add() while (curOrConstraint->next!=NULL) { curOrConstraint = curOrConstraint->next; } - curOrConstraint->next = NULL; + U_ASSERT(curOrConstraint->childNode == NULL); curOrConstraint->childNode = new AndConstraint(); } return curOrConstraint->childNode; } UBool -OrConstraint::isFulfilled(double number) { +OrConstraint::isFulfilled(const FixedDecimal &number) { OrConstraint* orRule=this; UBool result=FALSE; - + while (orRule!=NULL && !result) { result=TRUE; AndConstraint* andRule = orRule->childNode; @@ -679,113 +891,115 @@ OrConstraint::isFulfilled(double number) { } orRule = orRule->next; } - + return result; } -RuleChain::RuleChain() { - ruleHeader=NULL; - next = NULL; - repeatLimit=0; +RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(), + fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) { } -RuleChain::RuleChain(const RuleChain& other) { - this->repeatLimit = other.repeatLimit; - this->keyword=other.keyword; +RuleChain::RuleChain(const RuleChain& other) : + fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples), + fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded), + fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) { if (other.ruleHeader != NULL) { this->ruleHeader = new OrConstraint(*(other.ruleHeader)); } - else { - this->ruleHeader = NULL; - } - if (other.next != NULL ) { - this->next = new RuleChain(*other.next); - } - else - { - this->next = NULL; + if (other.fNext != NULL ) { + this->fNext = new RuleChain(*other.fNext); } } RuleChain::~RuleChain() { - if (next != NULL) { - delete next; - } - if ( ruleHeader != NULL ) { - delete ruleHeader; - } + delete fNext; + delete ruleHeader; } + UnicodeString -RuleChain::select(double number) const { - - if ( ruleHeader != NULL ) { - if (ruleHeader->isFulfilled(number)) { - return keyword; - } - } - if ( next != NULL ) { - return next->select(number); - } - else { - return PLURAL_KEYWORD_OTHER; - } +RuleChain::select(const FixedDecimal &number) const { + if (!number.isNanOrInfinity) { + for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) { + if (rules->ruleHeader->isFulfilled(number)) { + return rules->fKeyword; + } + } + } + return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); +} +static UnicodeString tokenString(tokenType tok) { + UnicodeString s; + switch (tok) { + case tVariableN: + s.append(LOW_N); break; + case tVariableI: + s.append(LOW_I); break; + case tVariableF: + s.append(LOW_F); break; + case tVariableV: + s.append(LOW_V); break; + case tVariableT: + s.append(LOW_T); break; + default: + s.append(TILDE); + } + return s; } void RuleChain::dumpRules(UnicodeString& result) { UChar digitString[16]; - + if ( ruleHeader != NULL ) { - result += keyword; + result += fKeyword; + result += COLON; + result += SPACE; OrConstraint* orRule=ruleHeader; while ( orRule != NULL ) { AndConstraint* andRule=orRule->childNode; while ( andRule != NULL ) { - if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) { - result += UNICODE_STRING_SIMPLE(" n is "); - if (andRule->notIn) { + if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) { + // Empty Rules. + } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) { + result += tokenString(andRule->digitsType); + result += UNICODE_STRING_SIMPLE(" is "); + if (andRule->negated) { result += UNICODE_STRING_SIMPLE("not "); } - uprv_itou(digitString,16, andRule->rangeLow,10,0); + uprv_itou(digitString,16, andRule->value,10,0); result += UnicodeString(digitString); } else { + result += tokenString(andRule->digitsType); + result += SPACE; if (andRule->op==AndConstraint::MOD) { - result += UNICODE_STRING_SIMPLE(" n mod "); + result += UNICODE_STRING_SIMPLE("mod "); uprv_itou(digitString,16, andRule->opNum,10,0); result += UnicodeString(digitString); } - else { - result += UNICODE_STRING_SIMPLE(" n "); - } - if (andRule->rangeHigh==-1) { - if (andRule->notIn) { + if (andRule->rangeList==NULL) { + if (andRule->negated) { result += UNICODE_STRING_SIMPLE(" is not "); - uprv_itou(digitString,16, andRule->rangeLow,10,0); + uprv_itou(digitString,16, andRule->value,10,0); result += UnicodeString(digitString); } else { result += UNICODE_STRING_SIMPLE(" is "); - uprv_itou(digitString,16, andRule->rangeLow,10,0); + uprv_itou(digitString,16, andRule->value,10,0); result += UnicodeString(digitString); } } else { - if (andRule->notIn) { + if (andRule->negated) { if ( andRule->integerOnly ) { - result += UNICODE_STRING_SIMPLE(" not in "); + result += UNICODE_STRING_SIMPLE(" not in "); } else { - result += UNICODE_STRING_SIMPLE(" not within "); + result += UNICODE_STRING_SIMPLE(" not within "); } - uprv_itou(digitString,16, andRule->rangeLow,10,0); - result += UnicodeString(digitString); - result += UNICODE_STRING_SIMPLE(" .. "); - uprv_itou(digitString,16, andRule->rangeHigh,10,0); - result += UnicodeString(digitString); } else { if ( andRule->integerOnly ) { @@ -794,66 +1008,48 @@ RuleChain::dumpRules(UnicodeString& result) { else { result += UNICODE_STRING_SIMPLE(" within "); } - uprv_itou(digitString,16, andRule->rangeLow,10,0); + } + for (int32_t r=0; rrangeList->size(); r+=2) { + int32_t rangeLo = andRule->rangeList->elementAti(r); + int32_t rangeHi = andRule->rangeList->elementAti(r+1); + uprv_itou(digitString,16, rangeLo, 10, 0); result += UnicodeString(digitString); - result += UNICODE_STRING_SIMPLE(" .. "); - uprv_itou(digitString,16, andRule->rangeHigh,10,0); + result += UNICODE_STRING_SIMPLE(".."); + uprv_itou(digitString,16, rangeHi, 10,0); + result += UnicodeString(digitString); + if (r+2 < andRule->rangeList->size()) { + result += UNICODE_STRING_SIMPLE(", "); + } } } } if ( (andRule=andRule->next) != NULL) { - result += PK_AND; + result += UNICODE_STRING_SIMPLE(" and "); } } if ( (orRule = orRule->next) != NULL ) { - result += PK_OR; + result += UNICODE_STRING_SIMPLE(" or "); } } } - if ( next != NULL ) { - next->dumpRules(result); + if ( fNext != NULL ) { + result += UNICODE_STRING_SIMPLE("; "); + fNext->dumpRules(result); } } -int32_t -RuleChain::getRepeatLimit () { - return repeatLimit; -} - -void -RuleChain::setRepeatLimit () { - int32_t limit=0; - - if ( next != NULL ) { - next->setRepeatLimit(); - limit = next->repeatLimit; - } - - if ( ruleHeader != NULL ) { - OrConstraint* orRule=ruleHeader; - while ( orRule != NULL ) { - AndConstraint* andRule=orRule->childNode; - while ( andRule != NULL ) { - limit = andRule->updateRepeatLimit(limit); - andRule = andRule->next; - } - orRule = orRule->next; - } - } - repeatLimit = limit; -} UErrorCode RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { if ( arraySize < capacityOfKeywords-1 ) { - keywords[arraySize++]=keyword; + keywords[arraySize++]=fKeyword; } else { return U_BUFFER_OVERFLOW_ERROR; } - if ( next != NULL ) { - return next->getKeywords(capacityOfKeywords, keywords, arraySize); + if ( fNext != NULL ) { + return fNext->getKeywords(capacityOfKeywords, keywords, arraySize); } else { return U_ZERO_ERROR; @@ -862,12 +1058,12 @@ RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int3 UBool RuleChain::isKeyword(const UnicodeString& keywordParam) const { - if ( keyword == keywordParam ) { + if ( fKeyword == keywordParam ) { return TRUE; } - if ( next != NULL ) { - return next->isKeyword(keywordParam); + if ( fNext != NULL ) { + return fNext->isKeyword(keywordParam); } else { return FALSE; @@ -875,80 +1071,122 @@ RuleChain::isKeyword(const UnicodeString& keywordParam) const { } -RuleParser::RuleParser() { - UErrorCode err=U_ZERO_ERROR; - const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]"); - const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]"); - idStartFilter = new UnicodeSet(idStart, err); - idContinueFilter = new UnicodeSet(idContinue, err); +PluralRuleParser::PluralRuleParser() : + ruleIndex(0), token(), type(none), prevType(none), + curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1) +{ +} + +PluralRuleParser::~PluralRuleParser() { } -RuleParser::~RuleParser() { - delete idStartFilter; - delete idContinueFilter; + +int32_t +PluralRuleParser::getNumberValue(const UnicodeString& token) { + int32_t i; + char digits[128]; + + i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); + digits[i]='\0'; + + return((int32_t)atoi(digits)); } + void -RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status) +PluralRuleParser::checkSyntax(UErrorCode &status) { if (U_FAILURE(status)) { return; } + if (!(prevType==none || prevType==tSemiColon)) { + type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word, + // and we are not at the start of a rule, where a + // keyword is expected. + } + switch(prevType) { case none: case tSemiColon: - if (curType!=tKeyword) { + if (type!=tKeyword && type != tEOF) { status = U_UNEXPECTED_TOKEN; } break; - case tVariableN : - if (curType != tIs && curType != tMod && curType != tIn && - curType != tNot && curType != tWithin) { + case tVariableN: + case tVariableI: + case tVariableF: + case tVariableT: + case tVariableV: + if (type != tIs && type != tMod && type != tIn && + type != tNot && type != tWithin && type != tEqual && type != tNotEqual) { status = U_UNEXPECTED_TOKEN; } break; - case tZero: - case tOne: - case tTwo: - case tFew: - case tMany: - case tOther: case tKeyword: - if (curType != tColon) { + if (type != tColon) { status = U_UNEXPECTED_TOKEN; } break; - case tColon : - if (curType != tVariableN) { + case tColon: + if (!(type == tVariableN || + type == tVariableI || + type == tVariableF || + type == tVariableT || + type == tVariableV || + type == tAt)) { status = U_UNEXPECTED_TOKEN; } break; case tIs: - if ( curType != tNumber && curType != tNot) { + if ( type != tNumber && type != tNot) { status = U_UNEXPECTED_TOKEN; } break; case tNot: - if (curType != tNumber && curType != tIn && curType != tWithin) { + if (type != tNumber && type != tIn && type != tWithin) { status = U_UNEXPECTED_TOKEN; } break; case tMod: - case tDot: + case tDot2: case tIn: case tWithin: + case tEqual: + case tNotEqual: + if (type != tNumber) { + status = U_UNEXPECTED_TOKEN; + } + break; case tAnd: case tOr: - if (curType != tNumber && curType != tVariableN) { + if ( type != tVariableN && + type != tVariableI && + type != tVariableF && + type != tVariableT && + type != tVariableV) { + status = U_UNEXPECTED_TOKEN; + } + break; + case tComma: + if (type != tNumber) { status = U_UNEXPECTED_TOKEN; } break; case tNumber: - if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot && - curType != tIn && curType != tWithin && curType != tAnd && curType != tOr) + if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot && + type != tIn && type != tEqual && type != tNotEqual && type != tWithin && + type != tAnd && type != tOr && type != tComma && type != tAt && + type != tEOF) { status = U_UNEXPECTED_TOKEN; } + // TODO: a comma following a number that is not part of a range will be allowed. + // It's not the only case of this sort of thing. Parser needs a re-write. + break; + case tAt: + if (type != tDecimal && type != tInteger) { + status = U_UNEXPECTED_TOKEN; + } break; default: status = U_UNEXPECTED_TOKEN; @@ -956,224 +1194,198 @@ RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &statu } } + +/* + * Scan the next token from the input rules. + * rules and returned token type are in the parser state variables. + */ void -RuleParser::getNextToken(const UnicodeString& ruleData, - int32_t *ruleIndex, - UnicodeString& token, - tokenType& type, - UErrorCode &status) +PluralRuleParser::getNextToken(UErrorCode &status) { - int32_t curIndex= *ruleIndex; - UChar ch; - tokenType prevType=none; - if (U_FAILURE(status)) { return; } - while (curIndexlength()) { + ch = ruleSrc->charAt(ruleIndex); + type = charType(ch); + if (type != tSpace) { + break; } - curIndex++; + ++(ruleIndex); } - if ( curIndex>=ruleData.length() ) { - if ( (type == tLetter)||(type == tNumber) ) { - token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); - getKeyType(token, type, status); - if (U_FAILURE(status)) { - return; - } - } - *ruleIndex = ruleData.length(); + if (ruleIndex >= ruleSrc->length()) { + type = tEOF; + return; } + int32_t curIndex= ruleIndex; + + switch (type) { + case tColon: + case tSemiColon: + case tComma: + case tEllipsis: + case tTilde: // scanned '~' + case tAt: // scanned '@' + case tEqual: // scanned '=' + case tMod: // scanned '%' + // Single character tokens. + ++curIndex; + break; + + case tNotEqual: // scanned '!' + if (ruleSrc->charAt(curIndex+1) == EQUALS) { + curIndex += 2; + } else { + type = none; + curIndex += 1; + } + break; + + case tKeyword: + while (type == tKeyword && ++curIndex < ruleSrc->length()) { + ch = ruleSrc->charAt(curIndex); + type = charType(ch); + } + type = tKeyword; + break; + + case tNumber: + while (type == tNumber && ++curIndex < ruleSrc->length()) { + ch = ruleSrc->charAt(curIndex); + type = charType(ch); + } + type = tNumber; + break; + + case tDot: + // We could be looking at either ".." in a range, or "..." at the end of a sample. + if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) { + ++curIndex; + break; // Single dot + } + if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) { + curIndex += 2; + type = tDot2; + break; // double dot + } + type = tEllipsis; + curIndex += 3; + break; // triple dot + + default: + status = U_UNEXPECTED_TOKEN; + ++curIndex; + break; + } + + U_ASSERT(ruleIndex <= ruleSrc->length()); + U_ASSERT(curIndex <= ruleSrc->length()); + token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex); + ruleIndex = curIndex; } -UBool -RuleParser::inRange(UChar ch, tokenType& type) { - if ((ch>=CAP_A) && (ch<=CAP_Z)) { - // we assume all characters are in lower case already. - return FALSE; - } - if ((ch>=LOW_A) && (ch<=LOW_Z)) { - type = tLetter; - return TRUE; - } +tokenType +PluralRuleParser::charType(UChar ch) { if ((ch>=U_ZERO) && (ch<=U_NINE)) { - type = tNumber; - return TRUE; + return tNumber; + } + if (ch>=LOW_A && ch<=LOW_Z) { + return tKeyword; } switch (ch) { case COLON: - type = tColon; - return TRUE; + return tColon; case SPACE: - type = tSpace; - return TRUE; + return tSpace; case SEMI_COLON: - type = tSemiColon; - return TRUE; + return tSemiColon; case DOT: - type = tDot; - return TRUE; + return tDot; + case COMMA: + return tComma; + case EXCLAMATION: + return tNotEqual; + case EQUALS: + return tEqual; + case PERCENT_SIGN: + return tMod; + case AT: + return tAt; + case ELLIPSIS: + return tEllipsis; + case TILDE: + return tTilde; default : - type = none; - return FALSE; + return none; } } -void -RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status) +// Set token type for reserved words in the Plural Rule syntax. + +tokenType +PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType) { - if (U_FAILURE(status)) { - return; + if (keyType != tKeyword) { + return keyType; } - if ( keyType==tNumber) { - } - else if (token==PK_VAR_N) { + + if (0 == token.compare(PK_VAR_N, 1)) { keyType = tVariableN; - } - else if (token==PK_IS) { + } else if (0 == token.compare(PK_VAR_I, 1)) { + keyType = tVariableI; + } else if (0 == token.compare(PK_VAR_F, 1)) { + keyType = tVariableF; + } else if (0 == token.compare(PK_VAR_T, 1)) { + keyType = tVariableT; + } else if (0 == token.compare(PK_VAR_V, 1)) { + keyType = tVariableV; + } else if (0 == token.compare(PK_IS, 2)) { keyType = tIs; - } - else if (token==PK_AND) { + } else if (0 == token.compare(PK_AND, 3)) { keyType = tAnd; - } - else if (token==PK_IN) { + } else if (0 == token.compare(PK_IN, 2)) { keyType = tIn; - } - else if (token==PK_WITHIN) { + } else if (0 == token.compare(PK_WITHIN, 6)) { keyType = tWithin; - } - else if (token==PK_NOT) { + } else if (0 == token.compare(PK_NOT, 3)) { keyType = tNot; - } - else if (token==PK_MOD) { + } else if (0 == token.compare(PK_MOD, 3)) { keyType = tMod; - } - else if (token==PK_OR) { + } else if (0 == token.compare(PK_OR, 2)) { keyType = tOr; + } else if (0 == token.compare(PK_DECIMAL, 7)) { + keyType = tDecimal; + } else if (0 == token.compare(PK_INTEGER, 7)) { + keyType = tInteger; } - else if ( isValidKeyword(token) ) { - keyType = tKeyword; - } - else { - status = U_UNEXPECTED_TOKEN; - } -} - -UBool -RuleParser::isValidKeyword(const UnicodeString& token) { - if ( token.length()==0 ) { - return FALSE; - } - if ( idStartFilter->contains(token.charAt(0) )==TRUE ) { - int32_t i; - for (i=1; i< token.length(); i++) { - if (idContinueFilter->contains(token.charAt(i))== FALSE) { - return FALSE; - } - } - return TRUE; - } - else { - return FALSE; - } + return keyType; } -PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) : -fKeywordNames(status) -{ - RuleChain *node=header; - UBool addKeywordOther=true; +PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) + : pos(0), fKeywordNames(status) { if (U_FAILURE(status)) { return; } - pos=0; - fKeywordNames.removeAllElements(); + fKeywordNames.setDeleter(uprv_deleteUObject); + UBool addKeywordOther=TRUE; + RuleChain *node=header; while(node!=NULL) { - fKeywordNames.addElement(new UnicodeString(node->keyword), status); + fKeywordNames.addElement(new UnicodeString(node->fKeyword), status); if (U_FAILURE(status)) { return; } - if (node->keyword == PLURAL_KEYWORD_OTHER) { - addKeywordOther= false; + if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) { + addKeywordOther= FALSE; } - node=node->next; + node=node->fNext; } - + if (addKeywordOther) { fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); - if (U_FAILURE(status)) { - return; - } } } @@ -1196,12 +1408,287 @@ PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { } PluralKeywordEnumeration::~PluralKeywordEnumeration() { - UnicodeString *s; - for (int32_t i=0; i visiblePower) { + // throw new IllegalArgumentException(); + // } + // double fraction = intValue + (decimalDigits / (double) visiblePower); + // if (fraction != source) { + // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source)); + // if (diff > 0.00000001d) { + // throw new IllegalArgumentException(); + // } + // } +} + +FixedDecimal::FixedDecimal(double n, int32_t v) { + // Ugly, but for samples we don't care. + init(n, v, getFractionalDigits(n, v)); +} + +FixedDecimal::FixedDecimal(double n) { + init(n); +} + +FixedDecimal::FixedDecimal() { + init(0, 0, 0); +} + + +// Create a FixedDecimal from a UnicodeString containing a number. +// Inefficient, but only used for samples, so simplicity trumps efficiency. + +FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { + CharString cs; + cs.appendInvariantChars(num, status); + DigitList dl; + dl.set(cs.toStringPiece(), status); + if (U_FAILURE(status)) { + init(0, 0, 0); + return; + } + int32_t decimalPoint = num.indexOf(DOT); + double n = dl.getDouble(); + if (decimalPoint == -1) { + init(n, 0, 0); + } else { + int32_t v = num.length() - decimalPoint - 1; + init(n, v, getFractionalDigits(n, v)); + } +} + + +FixedDecimal::FixedDecimal(const FixedDecimal &other) { + source = other.source; + visibleDecimalDigitCount = other.visibleDecimalDigitCount; + decimalDigits = other.decimalDigits; + decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros; + intValue = other.intValue; + hasIntegerValue = other.hasIntegerValue; + isNegative = other.isNegative; + isNanOrInfinity = other.isNanOrInfinity; +} + + +void FixedDecimal::init(double n) { + int32_t numFractionDigits = decimals(n); + init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); +} + + +void FixedDecimal::init(double n, int32_t v, int64_t f) { + isNegative = n < 0.0; + source = fabs(n); + isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source); + if (isNanOrInfinity) { + v = 0; + f = 0; + intValue = 0; + hasIntegerValue = FALSE; + } else { + intValue = (int64_t)source; + hasIntegerValue = (source == intValue); + } + + visibleDecimalDigitCount = v; + decimalDigits = f; + if (f == 0) { + decimalDigitsWithoutTrailingZeros = 0; + } else { + int64_t fdwtz = f; + while ((fdwtz%10) == 0) { + fdwtz /= 10; + } + decimalDigitsWithoutTrailingZeros = fdwtz; + } +} + + +// Fast path only exact initialization. Return true if successful. +// Note: Do not multiply by 10 each time through loop, rounding cruft can build +// up that makes the check for an integer result fail. +// A single multiply of the original number works more reliably. +static int32_t p10[] = {1, 10, 100, 1000, 10000}; +UBool FixedDecimal::quickInit(double n) { + UBool success = FALSE; + n = fabs(n); + int32_t numFractionDigits; + for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) { + double scaledN = n * p10[numFractionDigits]; + if (scaledN == floor(scaledN)) { + success = TRUE; + break; + } + } + if (success) { + init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); + } + return success; +} + + + +int32_t FixedDecimal::decimals(double n) { + // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros. + // fastpath the common cases, integers or fractions with 3 or fewer digits + n = fabs(n); + for (int ndigits=0; ndigits<=3; ndigits++) { + double scaledN = n * p10[ndigits]; + if (scaledN == floor(scaledN)) { + return ndigits; + } + } + + // Slow path, convert with sprintf, parse converted output. + char buf[30] = {0}; + sprintf(buf, "%1.15e", n); + // formatted number looks like this: 1.234567890123457e-01 + int exponent = atoi(buf+18); + int numFractionDigits = 15; + for (int i=16; ; --i) { + if (buf[i] != '0') { + break; + } + --numFractionDigits; + } + numFractionDigits -= exponent; // Fraction part of fixed point representation. + return numFractionDigits; +} + + +// Get the fraction digits of a double, represented as an integer. +// v is the number of visible fraction digits in the displayed form of the number. +// Example: n = 1001.234, v = 6, result = 234000 +// TODO: need to think through how this is used in the plural rule context. +// This function can easily encounter integer overflow, +// and can easily return noise digits when the precision of a double is exceeded. + +int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) { + if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) { + return 0; + } + n = fabs(n); + double fract = n - floor(n); + switch (v) { + case 1: return (int64_t)(fract*10.0 + 0.5); + case 2: return (int64_t)(fract*100.0 + 0.5); + case 3: return (int64_t)(fract*1000.0 + 0.5); + default: + double scaled = floor(fract * pow(10.0, (double)v) + 0.5); + if (scaled > U_INT64_MAX) { + return U_INT64_MAX; + } else { + return (int64_t)scaled; + } + } +} + + +void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) { + int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount; + if (numTrailingFractionZeros > 0) { + for (int32_t i=0; i= 100000000000000000LL) { + break; + } + decimalDigits *= 10; + } + visibleDecimalDigitCount += numTrailingFractionZeros; + } +} + + +double FixedDecimal::get(tokenType operand) const { + switch(operand) { + case tVariableN: return source; + case tVariableI: return (double)intValue; + case tVariableF: return (double)decimalDigits; + case tVariableT: return (double)decimalDigitsWithoutTrailingZeros; + case tVariableV: return visibleDecimalDigitCount; + default: + U_ASSERT(FALSE); // unexpected. + return source; + } +} + +int32_t FixedDecimal::getVisibleFractionDigitCount() const { + return visibleDecimalDigitCount; +} + + + +PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) { + fLocales = NULL; + fRes = NULL; + fOpenStatus = status; + if (U_FAILURE(status)) { + return; + } + fOpenStatus = U_ZERO_ERROR; + LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus)); + fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus); +} + +PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() { + ures_close(fLocales); + ures_close(fRes); + fLocales = NULL; + fRes = NULL; +} + +const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) { + if (U_FAILURE(status)) { + return NULL; + } + if (U_FAILURE(fOpenStatus)) { + status = fOpenStatus; + return NULL; + } + fRes = ures_getNextResource(fLocales, fRes, &status); + if (fRes == NULL || U_FAILURE(status)) { + if (status == U_INDEX_OUTOFBOUNDS_ERROR) { + status = U_ZERO_ERROR; } + return NULL; + } + const char *result = ures_getKey(fRes); + if (resultLength != NULL) { + *resultLength = uprv_strlen(result); + } + return result; +} + + +void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (U_FAILURE(fOpenStatus)) { + status = fOpenStatus; + return; + } + ures_resetIterator(fLocales); +} + +int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const { + if (U_FAILURE(status)) { + return 0; + } + if (U_FAILURE(fOpenStatus)) { + status = fOpenStatus; + return 0; } + return ures_getSize(fLocales); } U_NAMESPACE_END