/*
*******************************************************************************
-* Copyright (C) 2007-2012, International Business Machines Corporation and
+* Copyright (C) 2007-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File plurrule.cpp
*/
+#include <math.h>
+#include <stdio.h>
+
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#include "unicode/plurrule.h"
#include "unicode/upluralrules.h"
#include "unicode/ures.h"
+#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
+#include "digitlst.h"
#include "hash.h"
+#include "locutil.h"
#include "mutex.h"
#include "patternprops.h"
#include "plurrule_impl.h"
#include "putilimp.h"
#include "ucln_in.h"
#include "ustrfmt.h"
-#include "locutil.h"
#include "uassert.h"
+#include "uvectr32.h"
+#include "sharedpluralrules.h"
+#include "lrucache.h"
#if !UCONFIG_NO_FORMATTING
-U_NAMESPACE_BEGIN
+static icu::LRUCache *gPluralRulesCache = NULL;
+static UMutex gPluralRulesCacheMutex = U_MUTEX_INITIALIZER;
+static icu::UInitOnce gPluralRulesCacheInitOnce = U_INITONCE_INITIALIZER;
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV plurrules_cleanup(void) {
+ gPluralRulesCacheInitOnce.reset();
+ if (gPluralRulesCache) {
+ delete gPluralRulesCache;
+ gPluralRulesCache = NULL;
+ }
+ return TRUE;
+}
+U_CDECL_END
-// shared by all instances when lazy-initializing samples
-static UMutex pluralMutex = U_MUTEX_INITIALIZER;
+U_NAMESPACE_BEGIN
#define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
static const UChar PK_OR[]={LOW_O,LOW_R,0};
static const UChar PK_VAR_N[]={LOW_N,0};
+static const UChar PK_VAR_I[]={LOW_I,0};
+static const UChar PK_VAR_F[]={LOW_F,0};
+static const UChar PK_VAR_T[]={LOW_T,0};
+static const UChar PK_VAR_V[]={LOW_V,0};
static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
+static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0};
+static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0};
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
-PluralRules::PluralRules(UErrorCode& status)
+PluralRules::PluralRules(UErrorCode& /*status*/)
: UObject(),
- mRules(NULL),
- mParser(NULL),
- mSamples(NULL),
- mSampleInfo(NULL),
- mSampleInfoCount(0)
+ mRules(NULL)
{
- if (U_FAILURE(status)) {
- return;
- }
- mParser = new RuleParser();
- if (mParser==NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
}
PluralRules::PluralRules(const PluralRules& other)
: UObject(other),
- mRules(NULL),
- mParser(NULL),
- mSamples(NULL),
- mSampleInfo(NULL),
- mSampleInfoCount(0)
+ mRules(NULL)
{
*this=other;
}
PluralRules::~PluralRules() {
delete mRules;
- delete mParser;
- uprv_free(mSamples);
- uprv_free(mSampleInfo);
+}
+
+SharedPluralRules::~SharedPluralRules() {
+ delete ptr;
}
PluralRules*
else {
mRules = new RuleChain(*other.mRules);
}
- delete mParser;
- mParser = new RuleParser();
-
- uprv_free(mSamples);
- mSamples = NULL;
-
- uprv_free(mSampleInfo);
- mSampleInfo = NULL;
- mSampleInfoCount = 0;
}
return *this;
}
+StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) {
+ StringEnumeration *result = new PluralAvailableLocalesEnumeration(status);
+ if (result == NULL && U_SUCCESS(status)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (U_FAILURE(status)) {
+ delete result;
+ result = NULL;
+ }
+ return result;
+}
+
+
PluralRules* U_EXPORT2
PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
- RuleChain rules;
-
if (U_FAILURE(status)) {
return NULL;
}
+
+ PluralRuleParser parser;
PluralRules *newRules = new PluralRules(status);
- if ( (newRules != NULL)&& U_SUCCESS(status) ) {
- newRules->parseDescription((UnicodeString &)description, rules, status);
- if (U_SUCCESS(status)) {
- newRules->addRules(rules);
- }
+ if (U_SUCCESS(status) && newRules == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
}
+ parser.parse(description, newRules, status);
if (U_FAILURE(status)) {
delete newRules;
- return NULL;
- }
- else {
- return newRules;
+ newRules = NULL;
}
+ return newRules;
}
+
PluralRules* U_EXPORT2
PluralRules::createDefaultRules(UErrorCode& status) {
return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
}
+/******************************************************************************/
+/* Create PluralRules cache */
+
+static SharedObject *U_CALLCONV createSharedPluralRules(
+ const char *localeId, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ PluralRules *pr = PluralRules::internalForLocale(
+ localeId, UPLURAL_TYPE_CARDINAL, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ SharedObject *result = new SharedPluralRules(pr);
+ if (result == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ delete pr;
+ return NULL;
+ }
+ return result;
+}
+
+static void U_CALLCONV pluralRulesCacheInit(UErrorCode &status) {
+ U_ASSERT(gPluralRulesCache == NULL);
+ ucln_i18n_registerCleanup(UCLN_I18N_PLURAL_RULE, plurrules_cleanup);
+ gPluralRulesCache = new SimpleLRUCache(100, &createSharedPluralRules, status);
+ if (U_FAILURE(status)) {
+ delete gPluralRulesCache;
+ gPluralRulesCache = NULL;
+ }
+}
+
+static void getSharedPluralRulesFromCache(
+ const char *locale,
+ const SharedPluralRules *&ptr,
+ UErrorCode &status) {
+ umtx_initOnce(gPluralRulesCacheInitOnce, &pluralRulesCacheInit, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ Mutex lock(&gPluralRulesCacheMutex);
+ gPluralRulesCache->get(locale, ptr, status);
+}
+
+
+
+
+/* end plural rules cache */
+/******************************************************************************/
+
+const SharedPluralRules* U_EXPORT2
+PluralRules::createSharedInstance(
+ const Locale& locale, UPluralType type, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ if (type != UPLURAL_TYPE_CARDINAL) {
+ status = U_UNSUPPORTED_ERROR;
+ return NULL;
+ }
+ const SharedPluralRules *result = NULL;
+ getSharedPluralRulesFromCache(locale.getName(), result, status);
+ return result;
+}
+
PluralRules* U_EXPORT2
PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
PluralRules* U_EXPORT2
PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
- RuleChain rChain;
+ if (type != UPLURAL_TYPE_CARDINAL) {
+ return internalForLocale(locale, type, status);
+ }
+ const SharedPluralRules *shared = createSharedInstance(
+ locale, type, status);
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ PluralRules *result = (*shared)->clone();
+ shared->removeRef();
+ if (result == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return result;
+}
+
+PluralRules* U_EXPORT2
+PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
if (U_FAILURE(status)) {
return NULL;
}
return NULL;
}
UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
- if ((locRule.length() != 0) && U_SUCCESS(status)) {
- newObj->parseDescription(locRule, rChain, status);
- if (U_SUCCESS(status)) {
- newObj->addRules(rChain);
- }
- }
- if (U_FAILURE(status)||(locRule.length() == 0)) {
- // use default plural rule
+ // TODO: which errors, if any, should be returned?
+ if (locRule.length() == 0) {
+ // Locales with no specific rules (all numbers have the "other" category
+ // will return a U_MISSING_RESOURCE_ERROR at this point. This is not
+ // an error.
+ locRule = UnicodeString(PLURAL_DEFAULT_RULE);
status = U_ZERO_ERROR;
- UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
- newObj->parseDescription(defRule, rChain, status);
- newObj->addRules(rChain);
}
+ PluralRuleParser parser;
+ parser.parse(locRule, newObj, status);
+ // TODO: should rule parse errors be returned, or
+ // should we silently use default rules?
+ // Original impl used default rules.
+ // Ask the question to ICU Core.
return newObj;
}
UnicodeString
PluralRules::select(int32_t number) const {
- if (mRules == NULL) {
- return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
- }
- else {
- return mRules->select(number);
- }
+ return select(FixedDecimal(number));
}
UnicodeString
PluralRules::select(double number) const {
+ return select(FixedDecimal(number));
+}
+
+UnicodeString
+PluralRules::select(const FixedDecimal &number) const {
if (mRules == NULL) {
return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
}
}
double
-PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
- double val = 0.0;
- UErrorCode status = U_ZERO_ERROR;
- int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
- return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
+PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) {
+ // Not Implemented.
+ return UPLRULES_NO_UNIQUE_VALUE;
}
int32_t
-PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
- int32_t destCapacity, UErrorCode& error) {
- return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
+PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */,
+ int32_t /* destCapacity */, UErrorCode& error) {
+ error = U_UNSUPPORTED_ERROR;
+ return 0;
}
-int32_t
-PluralRules::getSamples(const UnicodeString &keyword, double *dest,
- int32_t destCapacity, UErrorCode& status) {
- return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
-}
-
-int32_t
-PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
- int32_t destCapacity, UBool includeUnlimited,
- UErrorCode& status) {
- initSamples(status);
- if (U_FAILURE(status)) {
- return -1;
- }
- if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return -1;
+
+static double scaleForInt(double d) {
+ double scale = 1.0;
+ while (d != floor(d)) {
+ d = d * 10.0;
+ scale = scale * 10.0;
}
+ return scale;
+}
- int32_t index = getKeywordIndex(keyword, status);
- if (index == -1) {
- return 0;
- }
+static int32_t
+getSamplesFromString(const UnicodeString &samples, double *dest,
+ int32_t destCapacity, UErrorCode& status) {
+ int32_t sampleCount = 0;
+ int32_t sampleStartIdx = 0;
+ int32_t sampleEndIdx = 0;
+
+ //std::string ss; // TODO: debugging.
+ // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
+ for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) {
+ sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx);
+ if (sampleEndIdx == -1) {
+ sampleEndIdx = samples.length();
+ }
+ const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx);
+ // ss.erase();
+ // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
+ int32_t tildeIndex = sampleRange.indexOf(TILDE);
+ if (tildeIndex < 0) {
+ FixedDecimal fixed(sampleRange, status);
+ double sampleValue = fixed.source;
+ if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) {
+ dest[sampleCount++] = sampleValue;
+ }
+ } else {
+
+ FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status);
+ FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status);
+ double rangeLo = fixedLo.source;
+ double rangeHi = fixedHi.source;
+ if (U_FAILURE(status)) {
+ break;
+ }
+ if (rangeHi < rangeLo) {
+ status = U_INVALID_FORMAT_ERROR;
+ break;
+ }
- const int32_t LIMIT_MASK = 0x1 << 31;
+ // For ranges of samples with fraction decimal digits, scale the number up so that we
+ // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
- if (!includeUnlimited) {
- if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
- return -1;
+ double scale = scaleForInt(rangeLo);
+ double t = scaleForInt(rangeHi);
+ if (t > scale) {
+ scale = t;
+ }
+ rangeLo *= scale;
+ rangeHi *= scale;
+ for (double n=rangeLo; n<=rangeHi; n+=1) {
+ // Hack Alert: don't return any decimal samples with integer values that
+ // originated from a format with trailing decimals.
+ // This API is returning doubles, which can't distinguish having displayed
+ // zeros to the right of the decimal.
+ // This results in test failures with values mapping back to a different keyword.
+ double sampleValue = n/scale;
+ if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) {
+ dest[sampleCount++] = sampleValue;
+ }
+ if (sampleCount >= destCapacity) {
+ break;
+ }
+ }
}
+ sampleStartIdx = sampleEndIdx + 1;
}
+ return sampleCount;
+}
- int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
- int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
- int32_t len = limit - start;
- if (len <= destCapacity) {
- destCapacity = len;
- } else if (includeUnlimited) {
- len = destCapacity; // no overflow, and don't report more than we copy
- } else {
- status = U_BUFFER_OVERFLOW_ERROR;
- return len;
+
+int32_t
+PluralRules::getSamples(const UnicodeString &keyword, double *dest,
+ int32_t destCapacity, UErrorCode& status) {
+ RuleChain *rc = rulesForKeyword(keyword);
+ if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) {
+ return 0;
}
- for (int32_t i = 0; i < destCapacity; ++i, ++start) {
- dest[i] = mSamples[start];
+ int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status);
+ if (numSamples == 0) {
+ numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status);
}
- return len;
+ return numSamples;
+}
+
+
+RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const {
+ RuleChain *rc;
+ for (rc = mRules; rc != NULL; rc = rc->fNext) {
+ if (rc->fKeyword == keyword) {
+ break;
+ }
+ }
+ return rc;
}
if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
return true;
}
- else {
- if (mRules==NULL) {
- return false;
- }
- else {
- return mRules->isKeyword(keyword);
- }
- }
+ return rulesForKeyword(keyword) != NULL;
}
UnicodeString
UBool
PluralRules::operator==(const PluralRules& other) const {
- int32_t limit;
const UnicodeString *ptrKeyword;
UErrorCode status= U_ZERO_ERROR;
return FALSE;
}
- if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
- return FALSE;
- }
- UnicodeString myKeyword, otherKeyword;
- for (int32_t i=0; i<limit; ++i) {
- myKeyword = this->select(i);
- otherKeyword = other.select(i);
- if (myKeyword!=otherKeyword) {
- return FALSE;
- }
- }
return TRUE;
}
+
void
-PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
+PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status)
{
- int32_t ruleIndex=0;
- UnicodeString token;
- tokenType type;
- tokenType prevType=none;
- RuleChain *ruleChain=NULL;
- AndConstraint *curAndConstraint=NULL;
- OrConstraint *orNode=NULL;
- RuleChain *lastChain=NULL;
-
if (U_FAILURE(status)) {
return;
}
- UnicodeString ruleData = data.toLower("");
- while (ruleIndex< ruleData.length()) {
- mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
+ U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only!
+ ruleSrc = &ruleData;
+
+ while (ruleIndex< ruleSrc->length()) {
+ getNextToken(status);
if (U_FAILURE(status)) {
return;
}
- mParser->checkSyntax(prevType, type, status);
+ checkSyntax(status);
if (U_FAILURE(status)) {
return;
}
curAndConstraint = curAndConstraint->add();
break;
case tOr:
- lastChain = &rules;
- while (lastChain->next !=NULL) {
- lastChain = lastChain->next;
- }
- orNode=lastChain->ruleHeader;
- while (orNode->next != NULL) {
- orNode = orNode->next;
+ {
+ U_ASSERT(currentChain != NULL);
+ OrConstraint *orNode=currentChain->ruleHeader;
+ while (orNode->next != NULL) {
+ orNode = orNode->next;
+ }
+ orNode->next= new OrConstraint();
+ orNode=orNode->next;
+ orNode->next=NULL;
+ curAndConstraint = orNode->add();
}
- orNode->next= new OrConstraint();
- orNode=orNode->next;
- orNode->next=NULL;
- curAndConstraint = orNode->add();
break;
case tIs:
U_ASSERT(curAndConstraint != NULL);
- curAndConstraint->rangeHigh=-1;
+ U_ASSERT(curAndConstraint->value == -1);
+ U_ASSERT(curAndConstraint->rangeList == NULL);
break;
case tNot:
U_ASSERT(curAndConstraint != NULL);
- curAndConstraint->notIn=TRUE;
+ curAndConstraint->negated=TRUE;
break;
+
+ case tNotEqual:
+ curAndConstraint->negated=TRUE;
case tIn:
- U_ASSERT(curAndConstraint != NULL);
- curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
- curAndConstraint->integerOnly = TRUE;
- break;
case tWithin:
+ case tEqual:
U_ASSERT(curAndConstraint != NULL);
- curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
+ curAndConstraint->rangeList = new UVector32(status);
+ curAndConstraint->rangeList->addElement(-1, status); // range Low
+ curAndConstraint->rangeList->addElement(-1, status); // range Hi
+ rangeLowIdx = 0;
+ rangeHiIdx = 1;
+ curAndConstraint->value=PLURAL_RANGE_HIGH;
+ curAndConstraint->integerOnly = (type != tWithin);
break;
case tNumber:
U_ASSERT(curAndConstraint != NULL);
curAndConstraint->opNum=getNumberValue(token);
}
else {
- if (curAndConstraint->rangeLow == -1) {
- curAndConstraint->rangeLow=getNumberValue(token);
- }
- else {
- curAndConstraint->rangeHigh=getNumberValue(token);
+ if (curAndConstraint->rangeList == NULL) {
+ // this is for an 'is' rule
+ curAndConstraint->value = getNumberValue(token);
+ } else {
+ // this is for an 'in' or 'within' rule
+ if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) {
+ curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx);
+ curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
+ }
+ else {
+ curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
+ if (curAndConstraint->rangeList->elementAti(rangeLowIdx) >
+ curAndConstraint->rangeList->elementAti(rangeHiIdx)) {
+ // Range Lower bound > Range Upper bound.
+ // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
+ // used for all plural rule parse errors.
+ status = U_UNEXPECTED_TOKEN;
+ break;
+ }
+ }
}
}
break;
+ case tComma:
+ // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
+ // Catch cases like "n mod 10, is 1" here instead.
+ if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) {
+ status = U_UNEXPECTED_TOKEN;
+ break;
+ }
+ U_ASSERT(curAndConstraint->rangeList->size() >= 2);
+ rangeLowIdx = curAndConstraint->rangeList->size();
+ curAndConstraint->rangeList->addElement(-1, status); // range Low
+ rangeHiIdx = curAndConstraint->rangeList->size();
+ curAndConstraint->rangeList->addElement(-1, status); // range Hi
+ break;
case tMod:
U_ASSERT(curAndConstraint != NULL);
curAndConstraint->op=AndConstraint::MOD;
break;
+ case tVariableN:
+ case tVariableI:
+ case tVariableF:
+ case tVariableT:
+ case tVariableV:
+ U_ASSERT(curAndConstraint != NULL);
+ curAndConstraint->digitsType = type;
+ break;
case tKeyword:
- if (ruleChain==NULL) {
- ruleChain = &rules;
+ {
+ RuleChain *newChain = new RuleChain;
+ if (newChain == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ break;
}
- else {
- while (ruleChain->next!=NULL){
- ruleChain=ruleChain->next;
+ newChain->fKeyword = token;
+ if (prules->mRules == NULL) {
+ prules->mRules = newChain;
+ } else {
+ // The new rule chain goes at the end of the linked list of rule chains,
+ // unless there is an "other" keyword & chain. "other" must remain last.
+ RuleChain *insertAfter = prules->mRules;
+ while (insertAfter->fNext!=NULL &&
+ insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){
+ insertAfter=insertAfter->fNext;
}
- ruleChain=ruleChain->next=new RuleChain();
- }
- if (ruleChain->ruleHeader != NULL) {
- delete ruleChain->ruleHeader;
+ newChain->fNext = insertAfter->fNext;
+ insertAfter->fNext = newChain;
}
- orNode = ruleChain->ruleHeader = new OrConstraint();
+ OrConstraint *orNode = new OrConstraint();
+ newChain->ruleHeader = orNode;
curAndConstraint = orNode->add();
- ruleChain->keyword = token;
- break;
- default:
+ currentChain = newChain;
+ }
break;
- }
- prevType=type;
- }
-}
-
-int32_t
-PluralRules::getNumberValue(const UnicodeString& token) const {
- int32_t i;
- char digits[128];
-
- i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
- digits[i]='\0';
-
- return((int32_t)atoi(digits));
-}
-
-
-void
-PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
- int32_t i=*curIndex;
-
- localeName.remove();
- while (i< localeData.length()) {
- if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
- break;
- }
- i++;
- }
-
- while (i< localeData.length()) {
- if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
- break;
- }
- localeName+=localeData.charAt(i++);
- }
- *curIndex=i;
-}
-
-
-int32_t
-PluralRules::getRepeatLimit() const {
- if (mRules!=NULL) {
- return mRules->getRepeatLimit();
- }
- else {
- return 0;
- }
-}
-int32_t
-PluralRules::getKeywordIndex(const UnicodeString& keyword,
- UErrorCode& status) const {
- if (U_SUCCESS(status)) {
- int32_t n = 0;
- RuleChain* rc = mRules;
- while (rc != NULL) {
- if (rc->ruleHeader != NULL) {
- if (rc->keyword == keyword) {
- return n;
+ case tInteger:
+ for (;;) {
+ getNextToken(status);
+ if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
+ break;
}
- ++n;
- }
- rc = rc->next;
- }
- if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
- return n;
- }
- }
- return -1;
-}
-
-typedef struct SampleRecord {
- int32_t ruleIndex;
- double value;
-} SampleRecord;
-
-void
-PluralRules::initSamples(UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- Mutex lock(&pluralMutex);
-
- if (mSamples) {
- return;
- }
-
- // Note, the original design let you have multiple rules with the same keyword. But
- // we don't use that in our data and existing functions in this implementation don't
- // fully support it (for example, the returned keywords is a list and not a set).
- //
- // So I don't support this here either. If you ask for samples, or for all values,
- // you will get information about the first rule with that keyword, not all rules with
- // that keyword.
-
- int32_t maxIndex = 0;
- int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
- RuleChain* rc = mRules;
- while (rc != NULL) {
- if (rc->ruleHeader != NULL) {
- if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
- otherIndex = maxIndex;
+ if (type == tEllipsis) {
+ currentChain->fIntegerSamplesUnbounded = TRUE;
+ continue;
+ }
+ currentChain->fIntegerSamples.append(token);
}
- ++maxIndex;
- }
- rc = rc->next;
- }
- if (otherIndex == -1) {
- ++maxIndex;
- }
-
- LocalMemory<int32_t> newSampleInfo;
- if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- const int32_t LIMIT_MASK = 0x1 << 31;
-
- rc = mRules;
- int32_t n = 0;
- while (rc != NULL) {
- if (rc->ruleHeader != NULL) {
- newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
- }
- rc = rc->next;
- }
- if (otherIndex == -1) {
- newSampleInfo[maxIndex - 1] = 0; // unlimited
- }
-
- MaybeStackArray<SampleRecord, 10> newSamples;
- int32_t sampleCount = 0;
-
- int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
- if (limit < 10) {
- limit = 10;
- }
-
- for (int i = 0, keywordsRemaining = maxIndex;
- keywordsRemaining > 0 && i < limit;
- ++i) {
- double val = i / 2.0;
+ break;
- n = 0;
- rc = mRules;
- int32_t found = -1;
- while (rc != NULL) {
- if (rc->ruleHeader != NULL) {
- if (rc->ruleHeader->isFulfilled(val)) {
- found = n;
+ case tDecimal:
+ for (;;) {
+ getNextToken(status);
+ if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
break;
}
- ++n;
- }
- rc = rc->next;
- }
- if (found == -1) {
- // 'other'. If there is an 'other' rule, the rule set is bad since nothing
- // should leak through, but we don't bother to report that here.
- found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
- }
- if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
- continue;
- }
- newSampleInfo[found] += 1; // won't impact limit flag
-
- if (sampleCount == newSamples.getCapacity()) {
- int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
- if (NULL == newSamples.resize(newCapacity, sampleCount)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
+ if (type == tEllipsis) {
+ currentChain->fDecimalSamplesUnbounded = TRUE;
+ continue;
+ }
+ currentChain->fDecimalSamples.append(token);
}
+ break;
+
+ default:
+ break;
}
- newSamples[sampleCount].ruleIndex = found;
- newSamples[sampleCount].value = val;
- ++sampleCount;
-
- if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
- --keywordsRemaining;
- }
- }
-
- // sort the values by index, leaving order otherwise unchanged
- // this is just a selection sort for simplicity
- LocalMemory<double> values;
- if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- for (int i = 0, j = 0; i < maxIndex; ++i) {
- for (int k = 0; k < sampleCount; ++k) {
- if (newSamples[k].ruleIndex == i) {
- values[j++] = newSamples[k].value;
- }
+ prevType=type;
+ if (U_FAILURE(status)) {
+ break;
}
}
-
- // convert array of mask/lengths to array of mask/limits
- limit = 0;
- for (int i = 0; i < maxIndex; ++i) {
- int32_t info = newSampleInfo[i];
- int32_t len = info & ~LIMIT_MASK;
- limit += len;
- // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
- // it's not really unlimited, so mark it as limited
- int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
- newSampleInfo[i] = limit | mask;
- }
-
- // ok, we've got good data
- mSamples = values.orphan();
- mSampleInfo = newSampleInfo.orphan();
- mSampleInfoCount = maxIndex;
-}
-
-void
-PluralRules::addRules(RuleChain& rules) {
- RuleChain *newRule = new RuleChain(rules);
- this->mRules=newRule;
- newRule->setRepeatLimit();
}
UnicodeString
}
char setKey[256];
- UChar result[256];
u_UCharsToChars(s, setKey, resLen + 1);
// printf("\n PluralRule: %s\n", setKey);
-
LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode));
if(U_FAILURE(errCode)) {
return emptyStr;
}
- resLen=0;
LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode));
if (U_FAILURE(errCode)) {
return emptyStr;
}
int32_t numberKeys = ures_getSize(setRes.getAlias());
- char *key=NULL;
- int32_t len=0;
- for(int32_t i=0; i<numberKeys; ++i) {
- int32_t keyLen;
- resLen=0;
- s=ures_getNextString(setRes.getAlias(), &resLen, (const char**)&key, &errCode);
- keyLen = (int32_t)uprv_strlen(key);
- u_charsToUChars(key, result+len, keyLen);
- len += keyLen;
- result[len++]=COLON;
- uprv_memcpy(result+len, s, resLen*sizeof(UChar));
- len += resLen;
- result[len++]=SEMI_COLON;
- }
- result[len++]=0;
- u_UCharsToChars(result, setKey, len);
- // printf(" Rule: %s\n", setKey);
-
- return UnicodeString(result);
+ UnicodeString result;
+ const char *key=NULL;
+ for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ...
+ UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode);
+ UnicodeString uKey(key, -1, US_INV);
+ result.append(uKey);
+ result.append(COLON);
+ result.append(rules);
+ result.append(SEMI_COLON);
+ }
+ return result;
+}
+
+
+UnicodeString
+PluralRules::getRules() const {
+ UnicodeString rules;
+ if (mRules != NULL) {
+ mRules->dumpRules(rules);
+ }
+ return rules;
}
+
AndConstraint::AndConstraint() {
op = AndConstraint::NONE;
opNum=-1;
- rangeLow=-1;
- rangeHigh=-1;
- notIn=FALSE;
- integerOnly=FALSE;
+ value = -1;
+ rangeList = NULL;
+ negated = FALSE;
+ integerOnly = FALSE;
+ digitsType = none;
next=NULL;
}
AndConstraint::AndConstraint(const AndConstraint& other) {
this->op = other.op;
this->opNum=other.opNum;
- this->rangeLow=other.rangeLow;
- this->rangeHigh=other.rangeHigh;
+ this->value=other.value;
+ this->rangeList=NULL;
+ if (other.rangeList != NULL) {
+ UErrorCode status = U_ZERO_ERROR;
+ this->rangeList = new UVector32(status);
+ this->rangeList->assign(*other.rangeList, status);
+ }
this->integerOnly=other.integerOnly;
- this->notIn=other.notIn;
+ this->negated=other.negated;
+ this->digitsType = other.digitsType;
if (other.next==NULL) {
this->next=NULL;
}
}
AndConstraint::~AndConstraint() {
+ delete rangeList;
if (next!=NULL) {
delete next;
}
UBool
-AndConstraint::isFulfilled(double number) {
- UBool result=TRUE;
- double value=number;
-
- // arrrrrrgh
- if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
- return notIn;
- }
-
- if ( op == MOD ) {
- value = (int32_t)value % opNum;
- }
- if ( rangeHigh == -1 ) {
- if ( rangeLow == -1 ) {
- result = TRUE; // empty rule
- }
- else {
- if ( value == rangeLow ) {
- result = TRUE;
- }
- else {
- result = FALSE;
- }
- }
- }
- else {
- if ((rangeLow <= value) && (value <= rangeHigh)) {
- if (integerOnly) {
- if ( value != (int32_t)value) {
- result = FALSE;
- }
- else {
- result = TRUE;
- }
- }
- else {
- result = TRUE;
- }
- }
- else {
- result = FALSE;
- }
- }
- if (notIn) {
- return !result;
- }
- else {
- return result;
- }
-}
-
-UBool
-AndConstraint::isLimited() {
- return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
-}
-
-int32_t
-AndConstraint::updateRepeatLimit(int32_t maxLimit) {
-
- if ( op == MOD ) {
- return uprv_max(opNum, maxLimit);
+AndConstraint::isFulfilled(const FixedDecimal &number) {
+ UBool result = TRUE;
+ if (digitsType == none) {
+ // An empty AndConstraint, created by a rule with a keyword but no following expression.
+ return TRUE;
}
- else {
- if ( rangeHigh == -1 ) {
- return uprv_max(rangeLow, maxLimit);
+ double n = number.get(digitsType); // pulls n | i | v | f value for the number.
+ // Will always be positive.
+ // May be non-integer (n option only)
+ do {
+ if (integerOnly && n != uprv_floor(n)) {
+ result = FALSE;
+ break;
+ }
+
+ if (op == MOD) {
+ n = fmod(n, opNum);
+ }
+ if (rangeList == NULL) {
+ result = value == -1 || // empty rule
+ n == value; // 'is' rule
+ break;
}
- else{
- return uprv_max(rangeHigh, maxLimit);
+ result = FALSE; // 'in' or 'within' rule
+ for (int32_t r=0; r<rangeList->size(); r+=2) {
+ if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) {
+ result = TRUE;
+ break;
+ }
}
+ } while (FALSE);
+
+ if (negated) {
+ result = !result;
}
+ return result;
}
while (curOrConstraint->next!=NULL) {
curOrConstraint = curOrConstraint->next;
}
- curOrConstraint->next = NULL;
+ U_ASSERT(curOrConstraint->childNode == NULL);
curOrConstraint->childNode = new AndConstraint();
}
return curOrConstraint->childNode;
}
UBool
-OrConstraint::isFulfilled(double number) {
+OrConstraint::isFulfilled(const FixedDecimal &number) {
OrConstraint* orRule=this;
UBool result=FALSE;
return result;
}
-UBool
-OrConstraint::isLimited() {
- for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
- UBool result = FALSE;
- for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
- if (andc->isLimited()) {
- result = TRUE;
- break;
- }
- }
- if (result == FALSE) {
- return FALSE;
- }
- }
- return TRUE;
-}
-RuleChain::RuleChain() {
- ruleHeader=NULL;
- next = NULL;
- repeatLimit=0;
+RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(),
+ fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) {
}
-RuleChain::RuleChain(const RuleChain& other) {
- this->repeatLimit = other.repeatLimit;
- this->keyword=other.keyword;
+RuleChain::RuleChain(const RuleChain& other) :
+ fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples),
+ fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded),
+ fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) {
if (other.ruleHeader != NULL) {
this->ruleHeader = new OrConstraint(*(other.ruleHeader));
}
- else {
- this->ruleHeader = NULL;
- }
- if (other.next != NULL ) {
- this->next = new RuleChain(*other.next);
- }
- else
- {
- this->next = NULL;
+ if (other.fNext != NULL ) {
+ this->fNext = new RuleChain(*other.fNext);
}
}
RuleChain::~RuleChain() {
- if (next != NULL) {
- delete next;
- }
- if ( ruleHeader != NULL ) {
- delete ruleHeader;
- }
+ delete fNext;
+ delete ruleHeader;
}
-UnicodeString
-RuleChain::select(double number) const {
- if ( ruleHeader != NULL ) {
- if (ruleHeader->isFulfilled(number)) {
- return keyword;
- }
- }
- if ( next != NULL ) {
- return next->select(number);
- }
- else {
- return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
- }
+UnicodeString
+RuleChain::select(const FixedDecimal &number) const {
+ if (!number.isNanOrInfinity) {
+ for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) {
+ if (rules->ruleHeader->isFulfilled(number)) {
+ return rules->fKeyword;
+ }
+ }
+ }
+ return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
+}
+static UnicodeString tokenString(tokenType tok) {
+ UnicodeString s;
+ switch (tok) {
+ case tVariableN:
+ s.append(LOW_N); break;
+ case tVariableI:
+ s.append(LOW_I); break;
+ case tVariableF:
+ s.append(LOW_F); break;
+ case tVariableV:
+ s.append(LOW_V); break;
+ case tVariableT:
+ s.append(LOW_T); break;
+ default:
+ s.append(TILDE);
+ }
+ return s;
}
void
UChar digitString[16];
if ( ruleHeader != NULL ) {
- result += keyword;
+ result += fKeyword;
+ result += COLON;
+ result += SPACE;
OrConstraint* orRule=ruleHeader;
while ( orRule != NULL ) {
AndConstraint* andRule=orRule->childNode;
while ( andRule != NULL ) {
- if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
- result += UNICODE_STRING_SIMPLE(" n is ");
- if (andRule->notIn) {
+ if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) {
+ // Empty Rules.
+ } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) {
+ result += tokenString(andRule->digitsType);
+ result += UNICODE_STRING_SIMPLE(" is ");
+ if (andRule->negated) {
result += UNICODE_STRING_SIMPLE("not ");
}
- uprv_itou(digitString,16, andRule->rangeLow,10,0);
+ uprv_itou(digitString,16, andRule->value,10,0);
result += UnicodeString(digitString);
}
else {
+ result += tokenString(andRule->digitsType);
+ result += SPACE;
if (andRule->op==AndConstraint::MOD) {
- result += UNICODE_STRING_SIMPLE(" n mod ");
+ result += UNICODE_STRING_SIMPLE("mod ");
uprv_itou(digitString,16, andRule->opNum,10,0);
result += UnicodeString(digitString);
}
- else {
- result += UNICODE_STRING_SIMPLE(" n ");
- }
- if (andRule->rangeHigh==-1) {
- if (andRule->notIn) {
+ if (andRule->rangeList==NULL) {
+ if (andRule->negated) {
result += UNICODE_STRING_SIMPLE(" is not ");
- uprv_itou(digitString,16, andRule->rangeLow,10,0);
+ uprv_itou(digitString,16, andRule->value,10,0);
result += UnicodeString(digitString);
}
else {
result += UNICODE_STRING_SIMPLE(" is ");
- uprv_itou(digitString,16, andRule->rangeLow,10,0);
+ uprv_itou(digitString,16, andRule->value,10,0);
result += UnicodeString(digitString);
}
}
else {
- if (andRule->notIn) {
+ if (andRule->negated) {
if ( andRule->integerOnly ) {
- result += UNICODE_STRING_SIMPLE(" not in ");
+ result += UNICODE_STRING_SIMPLE(" not in ");
}
else {
- result += UNICODE_STRING_SIMPLE(" not within ");
+ result += UNICODE_STRING_SIMPLE(" not within ");
}
- uprv_itou(digitString,16, andRule->rangeLow,10,0);
- result += UnicodeString(digitString);
- result += UNICODE_STRING_SIMPLE(" .. ");
- uprv_itou(digitString,16, andRule->rangeHigh,10,0);
- result += UnicodeString(digitString);
}
else {
if ( andRule->integerOnly ) {
else {
result += UNICODE_STRING_SIMPLE(" within ");
}
- uprv_itou(digitString,16, andRule->rangeLow,10,0);
+ }
+ for (int32_t r=0; r<andRule->rangeList->size(); r+=2) {
+ int32_t rangeLo = andRule->rangeList->elementAti(r);
+ int32_t rangeHi = andRule->rangeList->elementAti(r+1);
+ uprv_itou(digitString,16, rangeLo, 10, 0);
result += UnicodeString(digitString);
- result += UNICODE_STRING_SIMPLE(" .. ");
- uprv_itou(digitString,16, andRule->rangeHigh,10,0);
+ result += UNICODE_STRING_SIMPLE("..");
+ uprv_itou(digitString,16, rangeHi, 10,0);
+ result += UnicodeString(digitString);
+ if (r+2 < andRule->rangeList->size()) {
+ result += UNICODE_STRING_SIMPLE(", ");
+ }
}
}
}
if ( (andRule=andRule->next) != NULL) {
- result.append(PK_AND, 3);
+ result += UNICODE_STRING_SIMPLE(" and ");
}
}
if ( (orRule = orRule->next) != NULL ) {
- result.append(PK_OR, 2);
+ result += UNICODE_STRING_SIMPLE(" or ");
}
}
}
- if ( next != NULL ) {
- next->dumpRules(result);
+ if ( fNext != NULL ) {
+ result += UNICODE_STRING_SIMPLE("; ");
+ fNext->dumpRules(result);
}
}
-int32_t
-RuleChain::getRepeatLimit () {
- return repeatLimit;
-}
-
-void
-RuleChain::setRepeatLimit () {
- int32_t limit=0;
-
- if ( next != NULL ) {
- next->setRepeatLimit();
- limit = next->repeatLimit;
- }
-
- if ( ruleHeader != NULL ) {
- OrConstraint* orRule=ruleHeader;
- while ( orRule != NULL ) {
- AndConstraint* andRule=orRule->childNode;
- while ( andRule != NULL ) {
- limit = andRule->updateRepeatLimit(limit);
- andRule = andRule->next;
- }
- orRule = orRule->next;
- }
- }
- repeatLimit = limit;
-}
UErrorCode
RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
if ( arraySize < capacityOfKeywords-1 ) {
- keywords[arraySize++]=keyword;
+ keywords[arraySize++]=fKeyword;
}
else {
return U_BUFFER_OVERFLOW_ERROR;
}
- if ( next != NULL ) {
- return next->getKeywords(capacityOfKeywords, keywords, arraySize);
+ if ( fNext != NULL ) {
+ return fNext->getKeywords(capacityOfKeywords, keywords, arraySize);
}
else {
return U_ZERO_ERROR;
UBool
RuleChain::isKeyword(const UnicodeString& keywordParam) const {
- if ( keyword == keywordParam ) {
+ if ( fKeyword == keywordParam ) {
return TRUE;
}
- if ( next != NULL ) {
- return next->isKeyword(keywordParam);
+ if ( fNext != NULL ) {
+ return fNext->isKeyword(keywordParam);
}
else {
return FALSE;
}
-RuleParser::RuleParser() {
+PluralRuleParser::PluralRuleParser() :
+ ruleIndex(0), token(), type(none), prevType(none),
+ curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1)
+{
+}
+
+PluralRuleParser::~PluralRuleParser() {
}
-RuleParser::~RuleParser() {
+
+int32_t
+PluralRuleParser::getNumberValue(const UnicodeString& token) {
+ int32_t i;
+ char digits[128];
+
+ i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
+ digits[i]='\0';
+
+ return((int32_t)atoi(digits));
}
+
void
-RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
+PluralRuleParser::checkSyntax(UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
+ if (!(prevType==none || prevType==tSemiColon)) {
+ type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word,
+ // and we are not at the start of a rule, where a
+ // keyword is expected.
+ }
+
switch(prevType) {
case none:
case tSemiColon:
- if (curType!=tKeyword) {
+ if (type!=tKeyword && type != tEOF) {
status = U_UNEXPECTED_TOKEN;
}
break;
- case tVariableN :
- if (curType != tIs && curType != tMod && curType != tIn &&
- curType != tNot && curType != tWithin) {
+ case tVariableN:
+ case tVariableI:
+ case tVariableF:
+ case tVariableT:
+ case tVariableV:
+ if (type != tIs && type != tMod && type != tIn &&
+ type != tNot && type != tWithin && type != tEqual && type != tNotEqual) {
status = U_UNEXPECTED_TOKEN;
}
break;
- case tZero:
- case tOne:
- case tTwo:
- case tFew:
- case tMany:
- case tOther:
case tKeyword:
- if (curType != tColon) {
+ if (type != tColon) {
status = U_UNEXPECTED_TOKEN;
}
break;
- case tColon :
- if (curType != tVariableN) {
+ case tColon:
+ if (!(type == tVariableN ||
+ type == tVariableI ||
+ type == tVariableF ||
+ type == tVariableT ||
+ type == tVariableV ||
+ type == tAt)) {
status = U_UNEXPECTED_TOKEN;
}
break;
case tIs:
- if ( curType != tNumber && curType != tNot) {
+ if ( type != tNumber && type != tNot) {
status = U_UNEXPECTED_TOKEN;
}
break;
case tNot:
- if (curType != tNumber && curType != tIn && curType != tWithin) {
+ if (type != tNumber && type != tIn && type != tWithin) {
status = U_UNEXPECTED_TOKEN;
}
break;
case tMod:
- case tDot:
+ case tDot2:
case tIn:
case tWithin:
+ case tEqual:
+ case tNotEqual:
+ if (type != tNumber) {
+ status = U_UNEXPECTED_TOKEN;
+ }
+ break;
case tAnd:
case tOr:
- if (curType != tNumber && curType != tVariableN) {
+ if ( type != tVariableN &&
+ type != tVariableI &&
+ type != tVariableF &&
+ type != tVariableT &&
+ type != tVariableV) {
+ status = U_UNEXPECTED_TOKEN;
+ }
+ break;
+ case tComma:
+ if (type != tNumber) {
status = U_UNEXPECTED_TOKEN;
}
break;
case tNumber:
- if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
- curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
+ if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot &&
+ type != tIn && type != tEqual && type != tNotEqual && type != tWithin &&
+ type != tAnd && type != tOr && type != tComma && type != tAt &&
+ type != tEOF)
{
status = U_UNEXPECTED_TOKEN;
}
+ // TODO: a comma following a number that is not part of a range will be allowed.
+ // It's not the only case of this sort of thing. Parser needs a re-write.
+ break;
+ case tAt:
+ if (type != tDecimal && type != tInteger) {
+ status = U_UNEXPECTED_TOKEN;
+ }
break;
default:
status = U_UNEXPECTED_TOKEN;
}
}
+
+/*
+ * Scan the next token from the input rules.
+ * rules and returned token type are in the parser state variables.
+ */
void
-RuleParser::getNextToken(const UnicodeString& ruleData,
- int32_t *ruleIndex,
- UnicodeString& token,
- tokenType& type,
- UErrorCode &status)
+PluralRuleParser::getNextToken(UErrorCode &status)
{
- int32_t curIndex= *ruleIndex;
- UChar ch;
- tokenType prevType=none;
-
if (U_FAILURE(status)) {
return;
}
- while (curIndex<ruleData.length()) {
- ch = ruleData.charAt(curIndex);
- if ( !inRange(ch, type) ) {
- status = U_ILLEGAL_CHARACTER;
- return;
- }
- switch (type) {
- case tSpace:
- if ( *ruleIndex != curIndex ) { // letter
- token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
- *ruleIndex=curIndex;
- type=prevType;
- getKeyType(token, type, status);
- return;
- }
- else {
- *ruleIndex=*ruleIndex+1;
- }
- break; // consective space
- case tColon:
- case tSemiColon:
- if ( *ruleIndex != curIndex ) {
- token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
- *ruleIndex=curIndex;
- type=prevType;
- getKeyType(token, type, status);
- return;
- }
- else {
- *ruleIndex=curIndex+1;
- return;
- }
- case tLetter:
- if ((type==prevType)||(prevType==none)) {
- prevType=type;
- break;
- }
- break;
- case tNumber:
- if ((type==prevType)||(prevType==none)) {
- prevType=type;
- break;
- }
- else {
- *ruleIndex=curIndex+1;
- return;
- }
- case tDot:
- if (prevType==none) { // first dot
- prevType=type;
- continue;
- }
- else {
- if ( *ruleIndex != curIndex ) {
- token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
- *ruleIndex=curIndex; // letter
- type=prevType;
- getKeyType(token, type, status);
- return;
- }
- else { // two consective dots
- *ruleIndex=curIndex+2;
- return;
- }
- }
- default:
- status = U_UNEXPECTED_TOKEN;
- return;
+
+ UChar ch;
+ while (ruleIndex < ruleSrc->length()) {
+ ch = ruleSrc->charAt(ruleIndex);
+ type = charType(ch);
+ if (type != tSpace) {
+ break;
}
- curIndex++;
+ ++(ruleIndex);
}
- if ( curIndex>=ruleData.length() ) {
- if ( (type == tLetter)||(type == tNumber) ) {
- token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
- getKeyType(token, type, status);
- if (U_FAILURE(status)) {
- return;
- }
- }
- *ruleIndex = ruleData.length();
+ if (ruleIndex >= ruleSrc->length()) {
+ type = tEOF;
+ return;
}
+ int32_t curIndex= ruleIndex;
+
+ switch (type) {
+ case tColon:
+ case tSemiColon:
+ case tComma:
+ case tEllipsis:
+ case tTilde: // scanned '~'
+ case tAt: // scanned '@'
+ case tEqual: // scanned '='
+ case tMod: // scanned '%'
+ // Single character tokens.
+ ++curIndex;
+ break;
+
+ case tNotEqual: // scanned '!'
+ if (ruleSrc->charAt(curIndex+1) == EQUALS) {
+ curIndex += 2;
+ } else {
+ type = none;
+ curIndex += 1;
+ }
+ break;
+
+ case tKeyword:
+ while (type == tKeyword && ++curIndex < ruleSrc->length()) {
+ ch = ruleSrc->charAt(curIndex);
+ type = charType(ch);
+ }
+ type = tKeyword;
+ break;
+
+ case tNumber:
+ while (type == tNumber && ++curIndex < ruleSrc->length()) {
+ ch = ruleSrc->charAt(curIndex);
+ type = charType(ch);
+ }
+ type = tNumber;
+ break;
+
+ case tDot:
+ // We could be looking at either ".." in a range, or "..." at the end of a sample.
+ if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) {
+ ++curIndex;
+ break; // Single dot
+ }
+ if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) {
+ curIndex += 2;
+ type = tDot2;
+ break; // double dot
+ }
+ type = tEllipsis;
+ curIndex += 3;
+ break; // triple dot
+
+ default:
+ status = U_UNEXPECTED_TOKEN;
+ ++curIndex;
+ break;
+ }
+
+ U_ASSERT(ruleIndex <= ruleSrc->length());
+ U_ASSERT(curIndex <= ruleSrc->length());
+ token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex);
+ ruleIndex = curIndex;
}
-UBool
-RuleParser::inRange(UChar ch, tokenType& type) {
- if ((ch>=CAP_A) && (ch<=CAP_Z)) {
- // we assume all characters are in lower case already.
- return FALSE;
- }
- if ((ch>=LOW_A) && (ch<=LOW_Z)) {
- type = tLetter;
- return TRUE;
- }
+tokenType
+PluralRuleParser::charType(UChar ch) {
if ((ch>=U_ZERO) && (ch<=U_NINE)) {
- type = tNumber;
- return TRUE;
+ return tNumber;
+ }
+ if (ch>=LOW_A && ch<=LOW_Z) {
+ return tKeyword;
}
switch (ch) {
case COLON:
- type = tColon;
- return TRUE;
+ return tColon;
case SPACE:
- type = tSpace;
- return TRUE;
+ return tSpace;
case SEMI_COLON:
- type = tSemiColon;
- return TRUE;
+ return tSemiColon;
case DOT:
- type = tDot;
- return TRUE;
+ return tDot;
+ case COMMA:
+ return tComma;
+ case EXCLAMATION:
+ return tNotEqual;
+ case EQUALS:
+ return tEqual;
+ case PERCENT_SIGN:
+ return tMod;
+ case AT:
+ return tAt;
+ case ELLIPSIS:
+ return tEllipsis;
+ case TILDE:
+ return tTilde;
default :
- type = none;
- return FALSE;
+ return none;
}
}
-void
-RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
+// Set token type for reserved words in the Plural Rule syntax.
+
+tokenType
+PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType)
{
- if (U_FAILURE(status)) {
- return;
+ if (keyType != tKeyword) {
+ return keyType;
}
- if ( keyType==tNumber) {
- }
- else if (0 == token.compare(PK_VAR_N, 1)) {
+
+ if (0 == token.compare(PK_VAR_N, 1)) {
keyType = tVariableN;
- }
- else if (0 == token.compare(PK_IS, 2)) {
+ } else if (0 == token.compare(PK_VAR_I, 1)) {
+ keyType = tVariableI;
+ } else if (0 == token.compare(PK_VAR_F, 1)) {
+ keyType = tVariableF;
+ } else if (0 == token.compare(PK_VAR_T, 1)) {
+ keyType = tVariableT;
+ } else if (0 == token.compare(PK_VAR_V, 1)) {
+ keyType = tVariableV;
+ } else if (0 == token.compare(PK_IS, 2)) {
keyType = tIs;
- }
- else if (0 == token.compare(PK_AND, 3)) {
+ } else if (0 == token.compare(PK_AND, 3)) {
keyType = tAnd;
- }
- else if (0 == token.compare(PK_IN, 2)) {
+ } else if (0 == token.compare(PK_IN, 2)) {
keyType = tIn;
- }
- else if (0 == token.compare(PK_WITHIN, 6)) {
+ } else if (0 == token.compare(PK_WITHIN, 6)) {
keyType = tWithin;
- }
- else if (0 == token.compare(PK_NOT, 3)) {
+ } else if (0 == token.compare(PK_NOT, 3)) {
keyType = tNot;
- }
- else if (0 == token.compare(PK_MOD, 3)) {
+ } else if (0 == token.compare(PK_MOD, 3)) {
keyType = tMod;
- }
- else if (0 == token.compare(PK_OR, 2)) {
+ } else if (0 == token.compare(PK_OR, 2)) {
keyType = tOr;
+ } else if (0 == token.compare(PK_DECIMAL, 7)) {
+ keyType = tDecimal;
+ } else if (0 == token.compare(PK_INTEGER, 7)) {
+ keyType = tInteger;
}
- else if ( isValidKeyword(token) ) {
- keyType = tKeyword;
- }
- else {
- status = U_UNEXPECTED_TOKEN;
- }
+ return keyType;
}
-UBool
-RuleParser::isValidKeyword(const UnicodeString& token) {
- return PatternProps::isIdentifier(token.getBuffer(), token.length());
-}
PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
: pos(0), fKeywordNames(status) {
UBool addKeywordOther=TRUE;
RuleChain *node=header;
while(node!=NULL) {
- fKeywordNames.addElement(new UnicodeString(node->keyword), status);
+ fKeywordNames.addElement(new UnicodeString(node->fKeyword), status);
if (U_FAILURE(status)) {
return;
}
- if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
+ if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
addKeywordOther= FALSE;
}
- node=node->next;
+ node=node->fNext;
}
if (addKeywordOther) {
PluralKeywordEnumeration::~PluralKeywordEnumeration() {
}
+
+
+FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) {
+ init(n, v, f);
+ // check values. TODO make into unit test.
+ //
+ // long visiblePower = (int) Math.pow(10, v);
+ // if (decimalDigits > visiblePower) {
+ // throw new IllegalArgumentException();
+ // }
+ // double fraction = intValue + (decimalDigits / (double) visiblePower);
+ // if (fraction != source) {
+ // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
+ // if (diff > 0.00000001d) {
+ // throw new IllegalArgumentException();
+ // }
+ // }
+}
+
+FixedDecimal::FixedDecimal(double n, int32_t v) {
+ // Ugly, but for samples we don't care.
+ init(n, v, getFractionalDigits(n, v));
+}
+
+FixedDecimal::FixedDecimal(double n) {
+ init(n);
+}
+
+FixedDecimal::FixedDecimal() {
+ init(0, 0, 0);
+}
+
+
+// Create a FixedDecimal from a UnicodeString containing a number.
+// Inefficient, but only used for samples, so simplicity trumps efficiency.
+
+FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
+ CharString cs;
+ cs.appendInvariantChars(num, status);
+ DigitList dl;
+ dl.set(cs.toStringPiece(), status);
+ if (U_FAILURE(status)) {
+ init(0, 0, 0);
+ return;
+ }
+ int32_t decimalPoint = num.indexOf(DOT);
+ double n = dl.getDouble();
+ if (decimalPoint == -1) {
+ init(n, 0, 0);
+ } else {
+ int32_t v = num.length() - decimalPoint - 1;
+ init(n, v, getFractionalDigits(n, v));
+ }
+}
+
+
+FixedDecimal::FixedDecimal(const FixedDecimal &other) {
+ source = other.source;
+ visibleDecimalDigitCount = other.visibleDecimalDigitCount;
+ decimalDigits = other.decimalDigits;
+ decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros;
+ intValue = other.intValue;
+ hasIntegerValue = other.hasIntegerValue;
+ isNegative = other.isNegative;
+ isNanOrInfinity = other.isNanOrInfinity;
+}
+
+
+void FixedDecimal::init(double n) {
+ int32_t numFractionDigits = decimals(n);
+ init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
+}
+
+
+void FixedDecimal::init(double n, int32_t v, int64_t f) {
+ isNegative = n < 0.0;
+ source = fabs(n);
+ isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source);
+ if (isNanOrInfinity) {
+ v = 0;
+ f = 0;
+ intValue = 0;
+ hasIntegerValue = FALSE;
+ } else {
+ intValue = (int64_t)source;
+ hasIntegerValue = (source == intValue);
+ }
+
+ visibleDecimalDigitCount = v;
+ decimalDigits = f;
+ if (f == 0) {
+ decimalDigitsWithoutTrailingZeros = 0;
+ } else {
+ int64_t fdwtz = f;
+ while ((fdwtz%10) == 0) {
+ fdwtz /= 10;
+ }
+ decimalDigitsWithoutTrailingZeros = fdwtz;
+ }
+}
+
+
+// Fast path only exact initialization. Return true if successful.
+// Note: Do not multiply by 10 each time through loop, rounding cruft can build
+// up that makes the check for an integer result fail.
+// A single multiply of the original number works more reliably.
+static int32_t p10[] = {1, 10, 100, 1000, 10000};
+UBool FixedDecimal::quickInit(double n) {
+ UBool success = FALSE;
+ n = fabs(n);
+ int32_t numFractionDigits;
+ for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) {
+ double scaledN = n * p10[numFractionDigits];
+ if (scaledN == floor(scaledN)) {
+ success = TRUE;
+ break;
+ }
+ }
+ if (success) {
+ init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
+ }
+ return success;
+}
+
+
+
+int32_t FixedDecimal::decimals(double n) {
+ // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
+ // fastpath the common cases, integers or fractions with 3 or fewer digits
+ n = fabs(n);
+ for (int ndigits=0; ndigits<=3; ndigits++) {
+ double scaledN = n * p10[ndigits];
+ if (scaledN == floor(scaledN)) {
+ return ndigits;
+ }
+ }
+
+ // Slow path, convert with sprintf, parse converted output.
+ char buf[30] = {0};
+ sprintf(buf, "%1.15e", n);
+ // formatted number looks like this: 1.234567890123457e-01
+ int exponent = atoi(buf+18);
+ int numFractionDigits = 15;
+ for (int i=16; ; --i) {
+ if (buf[i] != '0') {
+ break;
+ }
+ --numFractionDigits;
+ }
+ numFractionDigits -= exponent; // Fraction part of fixed point representation.
+ return numFractionDigits;
+}
+
+
+// Get the fraction digits of a double, represented as an integer.
+// v is the number of visible fraction digits in the displayed form of the number.
+// Example: n = 1001.234, v = 6, result = 234000
+// TODO: need to think through how this is used in the plural rule context.
+// This function can easily encounter integer overflow,
+// and can easily return noise digits when the precision of a double is exceeded.
+
+int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) {
+ if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) {
+ return 0;
+ }
+ n = fabs(n);
+ double fract = n - floor(n);
+ switch (v) {
+ case 1: return (int64_t)(fract*10.0 + 0.5);
+ case 2: return (int64_t)(fract*100.0 + 0.5);
+ case 3: return (int64_t)(fract*1000.0 + 0.5);
+ default:
+ double scaled = floor(fract * pow(10.0, (double)v) + 0.5);
+ if (scaled > U_INT64_MAX) {
+ return U_INT64_MAX;
+ } else {
+ return (int64_t)scaled;
+ }
+ }
+}
+
+
+void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) {
+ int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount;
+ if (numTrailingFractionZeros > 0) {
+ for (int32_t i=0; i<numTrailingFractionZeros; i++) {
+ // Do not let the decimalDigits value overflow if there are many trailing zeros.
+ // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
+ if (decimalDigits >= 100000000000000000LL) {
+ break;
+ }
+ decimalDigits *= 10;
+ }
+ visibleDecimalDigitCount += numTrailingFractionZeros;
+ }
+}
+
+
+double FixedDecimal::get(tokenType operand) const {
+ switch(operand) {
+ case tVariableN: return source;
+ case tVariableI: return (double)intValue;
+ case tVariableF: return (double)decimalDigits;
+ case tVariableT: return (double)decimalDigitsWithoutTrailingZeros;
+ case tVariableV: return visibleDecimalDigitCount;
+ default:
+ U_ASSERT(FALSE); // unexpected.
+ return source;
+ }
+}
+
+int32_t FixedDecimal::getVisibleFractionDigitCount() const {
+ return visibleDecimalDigitCount;
+}
+
+
+
+PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) {
+ fLocales = NULL;
+ fRes = NULL;
+ fOpenStatus = status;
+ if (U_FAILURE(status)) {
+ return;
+ }
+ fOpenStatus = U_ZERO_ERROR;
+ LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus));
+ fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus);
+}
+
+PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
+ ures_close(fLocales);
+ ures_close(fRes);
+ fLocales = NULL;
+ fRes = NULL;
+}
+
+const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+ if (U_FAILURE(fOpenStatus)) {
+ status = fOpenStatus;
+ return NULL;
+ }
+ fRes = ures_getNextResource(fLocales, fRes, &status);
+ if (fRes == NULL || U_FAILURE(status)) {
+ if (status == U_INDEX_OUTOFBOUNDS_ERROR) {
+ status = U_ZERO_ERROR;
+ }
+ return NULL;
+ }
+ const char *result = ures_getKey(fRes);
+ if (resultLength != NULL) {
+ *resultLength = uprv_strlen(result);
+ }
+ return result;
+}
+
+
+void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (U_FAILURE(fOpenStatus)) {
+ status = fOpenStatus;
+ return;
+ }
+ ures_resetIterator(fLocales);
+}
+
+int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const {
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+ if (U_FAILURE(fOpenStatus)) {
+ status = fOpenStatus;
+ return 0;
+ }
+ return ures_getSize(fLocales);
+}
+
U_NAMESPACE_END