2 *******************************************************************************
3 * Copyright (C) 2007-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
13 #include "unicode/utypes.h"
14 #include "unicode/localpointer.h"
15 #include "unicode/plurrule.h"
16 #include "unicode/upluralrules.h"
17 #include "unicode/ures.h"
25 #include "patternprops.h"
26 #include "plurrule_impl.h"
32 #include "sharedpluralrules.h"
35 #if !UCONFIG_NO_FORMATTING
37 static icu::LRUCache
*gPluralRulesCache
= NULL
;
38 static UMutex gPluralRulesCacheMutex
= U_MUTEX_INITIALIZER
;
39 static icu::UInitOnce gPluralRulesCacheInitOnce
= U_INITONCE_INITIALIZER
;
42 static UBool U_CALLCONV
plurrules_cleanup(void) {
43 gPluralRulesCacheInitOnce
.reset();
44 if (gPluralRulesCache
) {
45 delete gPluralRulesCache
;
46 gPluralRulesCache
= NULL
;
54 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
56 static const UChar PLURAL_KEYWORD_OTHER
[]={LOW_O
,LOW_T
,LOW_H
,LOW_E
,LOW_R
,0};
57 static const UChar PLURAL_DEFAULT_RULE
[]={LOW_O
,LOW_T
,LOW_H
,LOW_E
,LOW_R
,COLON
,SPACE
,LOW_N
,0};
58 static const UChar PK_IN
[]={LOW_I
,LOW_N
,0};
59 static const UChar PK_NOT
[]={LOW_N
,LOW_O
,LOW_T
,0};
60 static const UChar PK_IS
[]={LOW_I
,LOW_S
,0};
61 static const UChar PK_MOD
[]={LOW_M
,LOW_O
,LOW_D
,0};
62 static const UChar PK_AND
[]={LOW_A
,LOW_N
,LOW_D
,0};
63 static const UChar PK_OR
[]={LOW_O
,LOW_R
,0};
64 static const UChar PK_VAR_N
[]={LOW_N
,0};
65 static const UChar PK_VAR_I
[]={LOW_I
,0};
66 static const UChar PK_VAR_F
[]={LOW_F
,0};
67 static const UChar PK_VAR_T
[]={LOW_T
,0};
68 static const UChar PK_VAR_V
[]={LOW_V
,0};
69 static const UChar PK_WITHIN
[]={LOW_W
,LOW_I
,LOW_T
,LOW_H
,LOW_I
,LOW_N
,0};
70 static const UChar PK_DECIMAL
[]={LOW_D
,LOW_E
,LOW_C
,LOW_I
,LOW_M
,LOW_A
,LOW_L
,0};
71 static const UChar PK_INTEGER
[]={LOW_I
,LOW_N
,LOW_T
,LOW_E
,LOW_G
,LOW_E
,LOW_R
,0};
73 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules
)
74 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration
)
76 PluralRules::PluralRules(UErrorCode
& /*status*/)
82 PluralRules::PluralRules(const PluralRules
& other
)
89 PluralRules::~PluralRules() {
93 SharedPluralRules::~SharedPluralRules() {
98 PluralRules::clone() const {
99 return new PluralRules(*this);
103 PluralRules::operator=(const PluralRules
& other
) {
104 if (this != &other
) {
106 if (other
.mRules
==NULL
) {
110 mRules
= new RuleChain(*other
.mRules
);
117 StringEnumeration
* PluralRules::getAvailableLocales(UErrorCode
&status
) {
118 StringEnumeration
*result
= new PluralAvailableLocalesEnumeration(status
);
119 if (result
== NULL
&& U_SUCCESS(status
)) {
120 status
= U_MEMORY_ALLOCATION_ERROR
;
122 if (U_FAILURE(status
)) {
130 PluralRules
* U_EXPORT2
131 PluralRules::createRules(const UnicodeString
& description
, UErrorCode
& status
) {
132 if (U_FAILURE(status
)) {
136 PluralRuleParser parser
;
137 PluralRules
*newRules
= new PluralRules(status
);
138 if (U_SUCCESS(status
) && newRules
== NULL
) {
139 status
= U_MEMORY_ALLOCATION_ERROR
;
141 parser
.parse(description
, newRules
, status
);
142 if (U_FAILURE(status
)) {
150 PluralRules
* U_EXPORT2
151 PluralRules::createDefaultRules(UErrorCode
& status
) {
152 return createRules(UnicodeString(TRUE
, PLURAL_DEFAULT_RULE
, -1), status
);
155 /******************************************************************************/
156 /* Create PluralRules cache */
158 static SharedObject
*U_CALLCONV
createSharedPluralRules(
159 const char *localeId
, UErrorCode
&status
) {
160 if (U_FAILURE(status
)) {
163 PluralRules
*pr
= PluralRules::internalForLocale(
164 localeId
, UPLURAL_TYPE_CARDINAL
, status
);
165 if (U_FAILURE(status
)) {
168 SharedObject
*result
= new SharedPluralRules(pr
);
169 if (result
== NULL
) {
170 status
= U_MEMORY_ALLOCATION_ERROR
;
177 static void U_CALLCONV
pluralRulesCacheInit(UErrorCode
&status
) {
178 U_ASSERT(gPluralRulesCache
== NULL
);
179 ucln_i18n_registerCleanup(UCLN_I18N_PLURAL_RULE
, plurrules_cleanup
);
180 gPluralRulesCache
= new SimpleLRUCache(100, &createSharedPluralRules
, status
);
181 if (U_FAILURE(status
)) {
182 delete gPluralRulesCache
;
183 gPluralRulesCache
= NULL
;
187 static void getSharedPluralRulesFromCache(
189 const SharedPluralRules
*&ptr
,
190 UErrorCode
&status
) {
191 umtx_initOnce(gPluralRulesCacheInitOnce
, &pluralRulesCacheInit
, status
);
192 if (U_FAILURE(status
)) {
195 Mutex
lock(&gPluralRulesCacheMutex
);
196 gPluralRulesCache
->get(locale
, ptr
, status
);
202 /* end plural rules cache */
203 /******************************************************************************/
205 const SharedPluralRules
* U_EXPORT2
206 PluralRules::createSharedInstance(
207 const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
208 if (U_FAILURE(status
)) {
211 if (type
!= UPLURAL_TYPE_CARDINAL
) {
212 status
= U_UNSUPPORTED_ERROR
;
215 const SharedPluralRules
*result
= NULL
;
216 getSharedPluralRulesFromCache(locale
.getName(), result
, status
);
220 PluralRules
* U_EXPORT2
221 PluralRules::forLocale(const Locale
& locale
, UErrorCode
& status
) {
222 return forLocale(locale
, UPLURAL_TYPE_CARDINAL
, status
);
225 PluralRules
* U_EXPORT2
226 PluralRules::forLocale(const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
227 if (type
!= UPLURAL_TYPE_CARDINAL
) {
228 return internalForLocale(locale
, type
, status
);
230 const SharedPluralRules
*shared
= createSharedInstance(
231 locale
, type
, status
);
232 if (U_FAILURE(status
)) {
235 PluralRules
*result
= (*shared
)->clone();
237 if (result
== NULL
) {
238 status
= U_MEMORY_ALLOCATION_ERROR
;
243 PluralRules
* U_EXPORT2
244 PluralRules::internalForLocale(const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
245 if (U_FAILURE(status
)) {
248 if (type
>= UPLURAL_TYPE_COUNT
) {
249 status
= U_ILLEGAL_ARGUMENT_ERROR
;
252 PluralRules
*newObj
= new PluralRules(status
);
253 if (newObj
==NULL
|| U_FAILURE(status
)) {
257 UnicodeString locRule
= newObj
->getRuleFromResource(locale
, type
, status
);
258 // TODO: which errors, if any, should be returned?
259 if (locRule
.length() == 0) {
260 // Locales with no specific rules (all numbers have the "other" category
261 // will return a U_MISSING_RESOURCE_ERROR at this point. This is not
263 locRule
= UnicodeString(PLURAL_DEFAULT_RULE
);
264 status
= U_ZERO_ERROR
;
266 PluralRuleParser parser
;
267 parser
.parse(locRule
, newObj
, status
);
268 // TODO: should rule parse errors be returned, or
269 // should we silently use default rules?
270 // Original impl used default rules.
271 // Ask the question to ICU Core.
277 PluralRules::select(int32_t number
) const {
278 return select(FixedDecimal(number
));
282 PluralRules::select(double number
) const {
283 return select(FixedDecimal(number
));
287 PluralRules::select(const FixedDecimal
&number
) const {
288 if (mRules
== NULL
) {
289 return UnicodeString(TRUE
, PLURAL_DEFAULT_RULE
, -1);
292 return mRules
->select(number
);
297 PluralRules::getKeywords(UErrorCode
& status
) const {
298 if (U_FAILURE(status
)) return NULL
;
299 StringEnumeration
* nameEnumerator
= new PluralKeywordEnumeration(mRules
, status
);
300 if (U_FAILURE(status
)) {
301 delete nameEnumerator
;
305 return nameEnumerator
;
309 PluralRules::getUniqueKeywordValue(const UnicodeString
& /* keyword */) {
311 return UPLRULES_NO_UNIQUE_VALUE
;
315 PluralRules::getAllKeywordValues(const UnicodeString
& /* keyword */, double * /* dest */,
316 int32_t /* destCapacity */, UErrorCode
& error
) {
317 error
= U_UNSUPPORTED_ERROR
;
322 static double scaleForInt(double d
) {
324 while (d
!= floor(d
)) {
326 scale
= scale
* 10.0;
332 getSamplesFromString(const UnicodeString
&samples
, double *dest
,
333 int32_t destCapacity
, UErrorCode
& status
) {
334 int32_t sampleCount
= 0;
335 int32_t sampleStartIdx
= 0;
336 int32_t sampleEndIdx
= 0;
338 //std::string ss; // TODO: debugging.
339 // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
340 for (sampleCount
= 0; sampleCount
< destCapacity
&& sampleStartIdx
< samples
.length(); ) {
341 sampleEndIdx
= samples
.indexOf(COMMA
, sampleStartIdx
);
342 if (sampleEndIdx
== -1) {
343 sampleEndIdx
= samples
.length();
345 const UnicodeString
&sampleRange
= samples
.tempSubStringBetween(sampleStartIdx
, sampleEndIdx
);
347 // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
348 int32_t tildeIndex
= sampleRange
.indexOf(TILDE
);
349 if (tildeIndex
< 0) {
350 FixedDecimal
fixed(sampleRange
, status
);
351 double sampleValue
= fixed
.source
;
352 if (fixed
.visibleDecimalDigitCount
== 0 || sampleValue
!= floor(sampleValue
)) {
353 dest
[sampleCount
++] = sampleValue
;
357 FixedDecimal
fixedLo(sampleRange
.tempSubStringBetween(0, tildeIndex
), status
);
358 FixedDecimal
fixedHi(sampleRange
.tempSubStringBetween(tildeIndex
+1), status
);
359 double rangeLo
= fixedLo
.source
;
360 double rangeHi
= fixedHi
.source
;
361 if (U_FAILURE(status
)) {
364 if (rangeHi
< rangeLo
) {
365 status
= U_INVALID_FORMAT_ERROR
;
369 // For ranges of samples with fraction decimal digits, scale the number up so that we
370 // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
372 double scale
= scaleForInt(rangeLo
);
373 double t
= scaleForInt(rangeHi
);
379 for (double n
=rangeLo
; n
<=rangeHi
; n
+=1) {
380 // Hack Alert: don't return any decimal samples with integer values that
381 // originated from a format with trailing decimals.
382 // This API is returning doubles, which can't distinguish having displayed
383 // zeros to the right of the decimal.
384 // This results in test failures with values mapping back to a different keyword.
385 double sampleValue
= n
/scale
;
386 if (!(sampleValue
== floor(sampleValue
) && fixedLo
.visibleDecimalDigitCount
> 0)) {
387 dest
[sampleCount
++] = sampleValue
;
389 if (sampleCount
>= destCapacity
) {
394 sampleStartIdx
= sampleEndIdx
+ 1;
401 PluralRules::getSamples(const UnicodeString
&keyword
, double *dest
,
402 int32_t destCapacity
, UErrorCode
& status
) {
403 RuleChain
*rc
= rulesForKeyword(keyword
);
404 if (rc
== NULL
|| destCapacity
== 0 || U_FAILURE(status
)) {
407 int32_t numSamples
= getSamplesFromString(rc
->fIntegerSamples
, dest
, destCapacity
, status
);
408 if (numSamples
== 0) {
409 numSamples
= getSamplesFromString(rc
->fDecimalSamples
, dest
, destCapacity
, status
);
415 RuleChain
*PluralRules::rulesForKeyword(const UnicodeString
&keyword
) const {
417 for (rc
= mRules
; rc
!= NULL
; rc
= rc
->fNext
) {
418 if (rc
->fKeyword
== keyword
) {
427 PluralRules::isKeyword(const UnicodeString
& keyword
) const {
428 if (0 == keyword
.compare(PLURAL_KEYWORD_OTHER
, 5)) {
431 return rulesForKeyword(keyword
) != NULL
;
435 PluralRules::getKeywordOther() const {
436 return UnicodeString(TRUE
, PLURAL_KEYWORD_OTHER
, 5);
440 PluralRules::operator==(const PluralRules
& other
) const {
441 const UnicodeString
*ptrKeyword
;
442 UErrorCode status
= U_ZERO_ERROR
;
444 if ( this == &other
) {
447 LocalPointer
<StringEnumeration
> myKeywordList(getKeywords(status
));
448 LocalPointer
<StringEnumeration
> otherKeywordList(other
.getKeywords(status
));
449 if (U_FAILURE(status
)) {
453 if (myKeywordList
->count(status
)!=otherKeywordList
->count(status
)) {
456 myKeywordList
->reset(status
);
457 while ((ptrKeyword
=myKeywordList
->snext(status
))!=NULL
) {
458 if (!other
.isKeyword(*ptrKeyword
)) {
462 otherKeywordList
->reset(status
);
463 while ((ptrKeyword
=otherKeywordList
->snext(status
))!=NULL
) {
464 if (!this->isKeyword(*ptrKeyword
)) {
468 if (U_FAILURE(status
)) {
477 PluralRuleParser::parse(const UnicodeString
& ruleData
, PluralRules
*prules
, UErrorCode
&status
)
479 if (U_FAILURE(status
)) {
482 U_ASSERT(ruleIndex
== 0); // Parsers are good for a single use only!
485 while (ruleIndex
< ruleSrc
->length()) {
486 getNextToken(status
);
487 if (U_FAILURE(status
)) {
491 if (U_FAILURE(status
)) {
496 U_ASSERT(curAndConstraint
!= NULL
);
497 curAndConstraint
= curAndConstraint
->add();
501 U_ASSERT(currentChain
!= NULL
);
502 OrConstraint
*orNode
=currentChain
->ruleHeader
;
503 while (orNode
->next
!= NULL
) {
504 orNode
= orNode
->next
;
506 orNode
->next
= new OrConstraint();
509 curAndConstraint
= orNode
->add();
513 U_ASSERT(curAndConstraint
!= NULL
);
514 U_ASSERT(curAndConstraint
->value
== -1);
515 U_ASSERT(curAndConstraint
->rangeList
== NULL
);
518 U_ASSERT(curAndConstraint
!= NULL
);
519 curAndConstraint
->negated
=TRUE
;
523 curAndConstraint
->negated
=TRUE
;
527 U_ASSERT(curAndConstraint
!= NULL
);
528 curAndConstraint
->rangeList
= new UVector32(status
);
529 curAndConstraint
->rangeList
->addElement(-1, status
); // range Low
530 curAndConstraint
->rangeList
->addElement(-1, status
); // range Hi
533 curAndConstraint
->value
=PLURAL_RANGE_HIGH
;
534 curAndConstraint
->integerOnly
= (type
!= tWithin
);
537 U_ASSERT(curAndConstraint
!= NULL
);
538 if ( (curAndConstraint
->op
==AndConstraint::MOD
)&&
539 (curAndConstraint
->opNum
== -1 ) ) {
540 curAndConstraint
->opNum
=getNumberValue(token
);
543 if (curAndConstraint
->rangeList
== NULL
) {
544 // this is for an 'is' rule
545 curAndConstraint
->value
= getNumberValue(token
);
547 // this is for an 'in' or 'within' rule
548 if (curAndConstraint
->rangeList
->elementAti(rangeLowIdx
) == -1) {
549 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeLowIdx
);
550 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeHiIdx
);
553 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeHiIdx
);
554 if (curAndConstraint
->rangeList
->elementAti(rangeLowIdx
) >
555 curAndConstraint
->rangeList
->elementAti(rangeHiIdx
)) {
556 // Range Lower bound > Range Upper bound.
557 // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
558 // used for all plural rule parse errors.
559 status
= U_UNEXPECTED_TOKEN
;
567 // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
568 // Catch cases like "n mod 10, is 1" here instead.
569 if (curAndConstraint
== NULL
|| curAndConstraint
->rangeList
== NULL
) {
570 status
= U_UNEXPECTED_TOKEN
;
573 U_ASSERT(curAndConstraint
->rangeList
->size() >= 2);
574 rangeLowIdx
= curAndConstraint
->rangeList
->size();
575 curAndConstraint
->rangeList
->addElement(-1, status
); // range Low
576 rangeHiIdx
= curAndConstraint
->rangeList
->size();
577 curAndConstraint
->rangeList
->addElement(-1, status
); // range Hi
580 U_ASSERT(curAndConstraint
!= NULL
);
581 curAndConstraint
->op
=AndConstraint::MOD
;
588 U_ASSERT(curAndConstraint
!= NULL
);
589 curAndConstraint
->digitsType
= type
;
593 RuleChain
*newChain
= new RuleChain
;
594 if (newChain
== NULL
) {
595 status
= U_MEMORY_ALLOCATION_ERROR
;
598 newChain
->fKeyword
= token
;
599 if (prules
->mRules
== NULL
) {
600 prules
->mRules
= newChain
;
602 // The new rule chain goes at the end of the linked list of rule chains,
603 // unless there is an "other" keyword & chain. "other" must remain last.
604 RuleChain
*insertAfter
= prules
->mRules
;
605 while (insertAfter
->fNext
!=NULL
&&
606 insertAfter
->fNext
->fKeyword
.compare(PLURAL_KEYWORD_OTHER
, 5) != 0 ){
607 insertAfter
=insertAfter
->fNext
;
609 newChain
->fNext
= insertAfter
->fNext
;
610 insertAfter
->fNext
= newChain
;
612 OrConstraint
*orNode
= new OrConstraint();
613 newChain
->ruleHeader
= orNode
;
614 curAndConstraint
= orNode
->add();
615 currentChain
= newChain
;
621 getNextToken(status
);
622 if (U_FAILURE(status
) || type
== tSemiColon
|| type
== tEOF
|| type
== tAt
) {
625 if (type
== tEllipsis
) {
626 currentChain
->fIntegerSamplesUnbounded
= TRUE
;
629 currentChain
->fIntegerSamples
.append(token
);
635 getNextToken(status
);
636 if (U_FAILURE(status
) || type
== tSemiColon
|| type
== tEOF
|| type
== tAt
) {
639 if (type
== tEllipsis
) {
640 currentChain
->fDecimalSamplesUnbounded
= TRUE
;
643 currentChain
->fDecimalSamples
.append(token
);
651 if (U_FAILURE(status
)) {
658 PluralRules::getRuleFromResource(const Locale
& locale
, UPluralType type
, UErrorCode
& errCode
) {
659 UnicodeString emptyStr
;
661 if (U_FAILURE(errCode
)) {
664 LocalUResourceBundlePointer
rb(ures_openDirect(NULL
, "plurals", &errCode
));
665 if(U_FAILURE(errCode
)) {
670 case UPLURAL_TYPE_CARDINAL
:
673 case UPLURAL_TYPE_ORDINAL
:
674 typeKey
= "locales_ordinals";
677 // Must not occur: The caller should have checked for valid types.
678 errCode
= U_ILLEGAL_ARGUMENT_ERROR
;
681 LocalUResourceBundlePointer
locRes(ures_getByKey(rb
.getAlias(), typeKey
, NULL
, &errCode
));
682 if(U_FAILURE(errCode
)) {
686 const char *curLocaleName
=locale
.getName();
687 const UChar
* s
= ures_getStringByKey(locRes
.getAlias(), curLocaleName
, &resLen
, &errCode
);
690 // Check parent locales.
691 UErrorCode status
= U_ZERO_ERROR
;
692 char parentLocaleName
[ULOC_FULLNAME_CAPACITY
];
693 const char *curLocaleName
=locale
.getName();
694 uprv_strcpy(parentLocaleName
, curLocaleName
);
696 while (uloc_getParent(parentLocaleName
, parentLocaleName
,
697 ULOC_FULLNAME_CAPACITY
, &status
) > 0) {
699 s
= ures_getStringByKey(locRes
.getAlias(), parentLocaleName
, &resLen
, &status
);
701 errCode
= U_ZERO_ERROR
;
704 status
= U_ZERO_ERROR
;
712 u_UCharsToChars(s
, setKey
, resLen
+ 1);
713 // printf("\n PluralRule: %s\n", setKey);
715 LocalUResourceBundlePointer
ruleRes(ures_getByKey(rb
.getAlias(), "rules", NULL
, &errCode
));
716 if(U_FAILURE(errCode
)) {
719 LocalUResourceBundlePointer
setRes(ures_getByKey(ruleRes
.getAlias(), setKey
, NULL
, &errCode
));
720 if (U_FAILURE(errCode
)) {
724 int32_t numberKeys
= ures_getSize(setRes
.getAlias());
725 UnicodeString result
;
726 const char *key
=NULL
;
727 for(int32_t i
=0; i
<numberKeys
; ++i
) { // Keys are zero, one, few, ...
728 UnicodeString rules
= ures_getNextUnicodeString(setRes
.getAlias(), &key
, &errCode
);
729 UnicodeString
uKey(key
, -1, US_INV
);
731 result
.append(COLON
);
732 result
.append(rules
);
733 result
.append(SEMI_COLON
);
740 PluralRules::getRules() const {
742 if (mRules
!= NULL
) {
743 mRules
->dumpRules(rules
);
749 AndConstraint::AndConstraint() {
750 op
= AndConstraint::NONE
;
761 AndConstraint::AndConstraint(const AndConstraint
& other
) {
763 this->opNum
=other
.opNum
;
764 this->value
=other
.value
;
765 this->rangeList
=NULL
;
766 if (other
.rangeList
!= NULL
) {
767 UErrorCode status
= U_ZERO_ERROR
;
768 this->rangeList
= new UVector32(status
);
769 this->rangeList
->assign(*other
.rangeList
, status
);
771 this->integerOnly
=other
.integerOnly
;
772 this->negated
=other
.negated
;
773 this->digitsType
= other
.digitsType
;
774 if (other
.next
==NULL
) {
778 this->next
= new AndConstraint(*other
.next
);
782 AndConstraint::~AndConstraint() {
791 AndConstraint::isFulfilled(const FixedDecimal
&number
) {
793 if (digitsType
== none
) {
794 // An empty AndConstraint, created by a rule with a keyword but no following expression.
797 double n
= number
.get(digitsType
); // pulls n | i | v | f value for the number.
798 // Will always be positive.
799 // May be non-integer (n option only)
801 if (integerOnly
&& n
!= uprv_floor(n
)) {
809 if (rangeList
== NULL
) {
810 result
= value
== -1 || // empty rule
811 n
== value
; // 'is' rule
814 result
= FALSE
; // 'in' or 'within' rule
815 for (int32_t r
=0; r
<rangeList
->size(); r
+=2) {
816 if (rangeList
->elementAti(r
) <= n
&& n
<= rangeList
->elementAti(r
+1)) {
833 this->next
= new AndConstraint();
837 OrConstraint::OrConstraint() {
842 OrConstraint::OrConstraint(const OrConstraint
& other
) {
843 if ( other
.childNode
== NULL
) {
844 this->childNode
= NULL
;
847 this->childNode
= new AndConstraint(*(other
.childNode
));
849 if (other
.next
== NULL
) {
853 this->next
= new OrConstraint(*(other
.next
));
857 OrConstraint::~OrConstraint() {
858 if (childNode
!=NULL
) {
869 OrConstraint
*curOrConstraint
=this;
871 while (curOrConstraint
->next
!=NULL
) {
872 curOrConstraint
= curOrConstraint
->next
;
874 U_ASSERT(curOrConstraint
->childNode
== NULL
);
875 curOrConstraint
->childNode
= new AndConstraint();
877 return curOrConstraint
->childNode
;
881 OrConstraint::isFulfilled(const FixedDecimal
&number
) {
882 OrConstraint
* orRule
=this;
885 while (orRule
!=NULL
&& !result
) {
887 AndConstraint
* andRule
= orRule
->childNode
;
888 while (andRule
!=NULL
&& result
) {
889 result
= andRule
->isFulfilled(number
);
890 andRule
=andRule
->next
;
892 orRule
= orRule
->next
;
899 RuleChain::RuleChain(): fKeyword(), fNext(NULL
), ruleHeader(NULL
), fDecimalSamples(), fIntegerSamples(),
900 fDecimalSamplesUnbounded(FALSE
), fIntegerSamplesUnbounded(FALSE
) {
903 RuleChain::RuleChain(const RuleChain
& other
) :
904 fKeyword(other
.fKeyword
), fNext(NULL
), ruleHeader(NULL
), fDecimalSamples(other
.fDecimalSamples
),
905 fIntegerSamples(other
.fIntegerSamples
), fDecimalSamplesUnbounded(other
.fDecimalSamplesUnbounded
),
906 fIntegerSamplesUnbounded(other
.fIntegerSamplesUnbounded
) {
907 if (other
.ruleHeader
!= NULL
) {
908 this->ruleHeader
= new OrConstraint(*(other
.ruleHeader
));
910 if (other
.fNext
!= NULL
) {
911 this->fNext
= new RuleChain(*other
.fNext
);
915 RuleChain::~RuleChain() {
922 RuleChain::select(const FixedDecimal
&number
) const {
923 if (!number
.isNanOrInfinity
) {
924 for (const RuleChain
*rules
= this; rules
!= NULL
; rules
= rules
->fNext
) {
925 if (rules
->ruleHeader
->isFulfilled(number
)) {
926 return rules
->fKeyword
;
930 return UnicodeString(TRUE
, PLURAL_KEYWORD_OTHER
, 5);
933 static UnicodeString
tokenString(tokenType tok
) {
937 s
.append(LOW_N
); break;
939 s
.append(LOW_I
); break;
941 s
.append(LOW_F
); break;
943 s
.append(LOW_V
); break;
945 s
.append(LOW_T
); break;
953 RuleChain::dumpRules(UnicodeString
& result
) {
954 UChar digitString
[16];
956 if ( ruleHeader
!= NULL
) {
960 OrConstraint
* orRule
=ruleHeader
;
961 while ( orRule
!= NULL
) {
962 AndConstraint
* andRule
=orRule
->childNode
;
963 while ( andRule
!= NULL
) {
964 if ((andRule
->op
==AndConstraint::NONE
) && (andRule
->rangeList
==NULL
) && (andRule
->value
== -1)) {
966 } else if ( (andRule
->op
==AndConstraint::NONE
) && (andRule
->rangeList
==NULL
) ) {
967 result
+= tokenString(andRule
->digitsType
);
968 result
+= UNICODE_STRING_SIMPLE(" is ");
969 if (andRule
->negated
) {
970 result
+= UNICODE_STRING_SIMPLE("not ");
972 uprv_itou(digitString
,16, andRule
->value
,10,0);
973 result
+= UnicodeString(digitString
);
976 result
+= tokenString(andRule
->digitsType
);
978 if (andRule
->op
==AndConstraint::MOD
) {
979 result
+= UNICODE_STRING_SIMPLE("mod ");
980 uprv_itou(digitString
,16, andRule
->opNum
,10,0);
981 result
+= UnicodeString(digitString
);
983 if (andRule
->rangeList
==NULL
) {
984 if (andRule
->negated
) {
985 result
+= UNICODE_STRING_SIMPLE(" is not ");
986 uprv_itou(digitString
,16, andRule
->value
,10,0);
987 result
+= UnicodeString(digitString
);
990 result
+= UNICODE_STRING_SIMPLE(" is ");
991 uprv_itou(digitString
,16, andRule
->value
,10,0);
992 result
+= UnicodeString(digitString
);
996 if (andRule
->negated
) {
997 if ( andRule
->integerOnly
) {
998 result
+= UNICODE_STRING_SIMPLE(" not in ");
1001 result
+= UNICODE_STRING_SIMPLE(" not within ");
1005 if ( andRule
->integerOnly
) {
1006 result
+= UNICODE_STRING_SIMPLE(" in ");
1009 result
+= UNICODE_STRING_SIMPLE(" within ");
1012 for (int32_t r
=0; r
<andRule
->rangeList
->size(); r
+=2) {
1013 int32_t rangeLo
= andRule
->rangeList
->elementAti(r
);
1014 int32_t rangeHi
= andRule
->rangeList
->elementAti(r
+1);
1015 uprv_itou(digitString
,16, rangeLo
, 10, 0);
1016 result
+= UnicodeString(digitString
);
1017 result
+= UNICODE_STRING_SIMPLE("..");
1018 uprv_itou(digitString
,16, rangeHi
, 10,0);
1019 result
+= UnicodeString(digitString
);
1020 if (r
+2 < andRule
->rangeList
->size()) {
1021 result
+= UNICODE_STRING_SIMPLE(", ");
1026 if ( (andRule
=andRule
->next
) != NULL
) {
1027 result
+= UNICODE_STRING_SIMPLE(" and ");
1030 if ( (orRule
= orRule
->next
) != NULL
) {
1031 result
+= UNICODE_STRING_SIMPLE(" or ");
1035 if ( fNext
!= NULL
) {
1036 result
+= UNICODE_STRING_SIMPLE("; ");
1037 fNext
->dumpRules(result
);
1043 RuleChain::getKeywords(int32_t capacityOfKeywords
, UnicodeString
* keywords
, int32_t& arraySize
) const {
1044 if ( arraySize
< capacityOfKeywords
-1 ) {
1045 keywords
[arraySize
++]=fKeyword
;
1048 return U_BUFFER_OVERFLOW_ERROR
;
1051 if ( fNext
!= NULL
) {
1052 return fNext
->getKeywords(capacityOfKeywords
, keywords
, arraySize
);
1055 return U_ZERO_ERROR
;
1060 RuleChain::isKeyword(const UnicodeString
& keywordParam
) const {
1061 if ( fKeyword
== keywordParam
) {
1065 if ( fNext
!= NULL
) {
1066 return fNext
->isKeyword(keywordParam
);
1074 PluralRuleParser::PluralRuleParser() :
1075 ruleIndex(0), token(), type(none
), prevType(none
),
1076 curAndConstraint(NULL
), currentChain(NULL
), rangeLowIdx(-1), rangeHiIdx(-1)
1080 PluralRuleParser::~PluralRuleParser() {
1085 PluralRuleParser::getNumberValue(const UnicodeString
& token
) {
1089 i
= token
.extract(0, token
.length(), digits
, ARRAY_SIZE(digits
), US_INV
);
1092 return((int32_t)atoi(digits
));
1097 PluralRuleParser::checkSyntax(UErrorCode
&status
)
1099 if (U_FAILURE(status
)) {
1102 if (!(prevType
==none
|| prevType
==tSemiColon
)) {
1103 type
= getKeyType(token
, type
); // Switch token type from tKeyword if we scanned a reserved word,
1104 // and we are not at the start of a rule, where a
1105 // keyword is expected.
1111 if (type
!=tKeyword
&& type
!= tEOF
) {
1112 status
= U_UNEXPECTED_TOKEN
;
1120 if (type
!= tIs
&& type
!= tMod
&& type
!= tIn
&&
1121 type
!= tNot
&& type
!= tWithin
&& type
!= tEqual
&& type
!= tNotEqual
) {
1122 status
= U_UNEXPECTED_TOKEN
;
1126 if (type
!= tColon
) {
1127 status
= U_UNEXPECTED_TOKEN
;
1131 if (!(type
== tVariableN
||
1132 type
== tVariableI
||
1133 type
== tVariableF
||
1134 type
== tVariableT
||
1135 type
== tVariableV
||
1137 status
= U_UNEXPECTED_TOKEN
;
1141 if ( type
!= tNumber
&& type
!= tNot
) {
1142 status
= U_UNEXPECTED_TOKEN
;
1146 if (type
!= tNumber
&& type
!= tIn
&& type
!= tWithin
) {
1147 status
= U_UNEXPECTED_TOKEN
;
1156 if (type
!= tNumber
) {
1157 status
= U_UNEXPECTED_TOKEN
;
1162 if ( type
!= tVariableN
&&
1163 type
!= tVariableI
&&
1164 type
!= tVariableF
&&
1165 type
!= tVariableT
&&
1166 type
!= tVariableV
) {
1167 status
= U_UNEXPECTED_TOKEN
;
1171 if (type
!= tNumber
) {
1172 status
= U_UNEXPECTED_TOKEN
;
1176 if (type
!= tDot2
&& type
!= tSemiColon
&& type
!= tIs
&& type
!= tNot
&&
1177 type
!= tIn
&& type
!= tEqual
&& type
!= tNotEqual
&& type
!= tWithin
&&
1178 type
!= tAnd
&& type
!= tOr
&& type
!= tComma
&& type
!= tAt
&&
1181 status
= U_UNEXPECTED_TOKEN
;
1183 // TODO: a comma following a number that is not part of a range will be allowed.
1184 // It's not the only case of this sort of thing. Parser needs a re-write.
1187 if (type
!= tDecimal
&& type
!= tInteger
) {
1188 status
= U_UNEXPECTED_TOKEN
;
1192 status
= U_UNEXPECTED_TOKEN
;
1199 * Scan the next token from the input rules.
1200 * rules and returned token type are in the parser state variables.
1203 PluralRuleParser::getNextToken(UErrorCode
&status
)
1205 if (U_FAILURE(status
)) {
1210 while (ruleIndex
< ruleSrc
->length()) {
1211 ch
= ruleSrc
->charAt(ruleIndex
);
1212 type
= charType(ch
);
1213 if (type
!= tSpace
) {
1218 if (ruleIndex
>= ruleSrc
->length()) {
1222 int32_t curIndex
= ruleIndex
;
1229 case tTilde
: // scanned '~'
1230 case tAt
: // scanned '@'
1231 case tEqual
: // scanned '='
1232 case tMod
: // scanned '%'
1233 // Single character tokens.
1237 case tNotEqual
: // scanned '!'
1238 if (ruleSrc
->charAt(curIndex
+1) == EQUALS
) {
1247 while (type
== tKeyword
&& ++curIndex
< ruleSrc
->length()) {
1248 ch
= ruleSrc
->charAt(curIndex
);
1249 type
= charType(ch
);
1255 while (type
== tNumber
&& ++curIndex
< ruleSrc
->length()) {
1256 ch
= ruleSrc
->charAt(curIndex
);
1257 type
= charType(ch
);
1263 // We could be looking at either ".." in a range, or "..." at the end of a sample.
1264 if (curIndex
+1 >= ruleSrc
->length() || ruleSrc
->charAt(curIndex
+1) != DOT
) {
1266 break; // Single dot
1268 if (curIndex
+2 >= ruleSrc
->length() || ruleSrc
->charAt(curIndex
+2) != DOT
) {
1271 break; // double dot
1275 break; // triple dot
1278 status
= U_UNEXPECTED_TOKEN
;
1283 U_ASSERT(ruleIndex
<= ruleSrc
->length());
1284 U_ASSERT(curIndex
<= ruleSrc
->length());
1285 token
=UnicodeString(*ruleSrc
, ruleIndex
, curIndex
-ruleIndex
);
1286 ruleIndex
= curIndex
;
1290 PluralRuleParser::charType(UChar ch
) {
1291 if ((ch
>=U_ZERO
) && (ch
<=U_NINE
)) {
1294 if (ch
>=LOW_A
&& ch
<=LOW_Z
) {
1326 // Set token type for reserved words in the Plural Rule syntax.
1329 PluralRuleParser::getKeyType(const UnicodeString
&token
, tokenType keyType
)
1331 if (keyType
!= tKeyword
) {
1335 if (0 == token
.compare(PK_VAR_N
, 1)) {
1336 keyType
= tVariableN
;
1337 } else if (0 == token
.compare(PK_VAR_I
, 1)) {
1338 keyType
= tVariableI
;
1339 } else if (0 == token
.compare(PK_VAR_F
, 1)) {
1340 keyType
= tVariableF
;
1341 } else if (0 == token
.compare(PK_VAR_T
, 1)) {
1342 keyType
= tVariableT
;
1343 } else if (0 == token
.compare(PK_VAR_V
, 1)) {
1344 keyType
= tVariableV
;
1345 } else if (0 == token
.compare(PK_IS
, 2)) {
1347 } else if (0 == token
.compare(PK_AND
, 3)) {
1349 } else if (0 == token
.compare(PK_IN
, 2)) {
1351 } else if (0 == token
.compare(PK_WITHIN
, 6)) {
1353 } else if (0 == token
.compare(PK_NOT
, 3)) {
1355 } else if (0 == token
.compare(PK_MOD
, 3)) {
1357 } else if (0 == token
.compare(PK_OR
, 2)) {
1359 } else if (0 == token
.compare(PK_DECIMAL
, 7)) {
1361 } else if (0 == token
.compare(PK_INTEGER
, 7)) {
1368 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain
*header
, UErrorCode
& status
)
1369 : pos(0), fKeywordNames(status
) {
1370 if (U_FAILURE(status
)) {
1373 fKeywordNames
.setDeleter(uprv_deleteUObject
);
1374 UBool addKeywordOther
=TRUE
;
1375 RuleChain
*node
=header
;
1377 fKeywordNames
.addElement(new UnicodeString(node
->fKeyword
), status
);
1378 if (U_FAILURE(status
)) {
1381 if (0 == node
->fKeyword
.compare(PLURAL_KEYWORD_OTHER
, 5)) {
1382 addKeywordOther
= FALSE
;
1387 if (addKeywordOther
) {
1388 fKeywordNames
.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER
), status
);
1392 const UnicodeString
*
1393 PluralKeywordEnumeration::snext(UErrorCode
& status
) {
1394 if (U_SUCCESS(status
) && pos
< fKeywordNames
.size()) {
1395 return (const UnicodeString
*)fKeywordNames
.elementAt(pos
++);
1401 PluralKeywordEnumeration::reset(UErrorCode
& /*status*/) {
1406 PluralKeywordEnumeration::count(UErrorCode
& /*status*/) const {
1407 return fKeywordNames
.size();
1410 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1415 FixedDecimal::FixedDecimal(double n
, int32_t v
, int64_t f
) {
1417 // check values. TODO make into unit test.
1419 // long visiblePower = (int) Math.pow(10, v);
1420 // if (decimalDigits > visiblePower) {
1421 // throw new IllegalArgumentException();
1423 // double fraction = intValue + (decimalDigits / (double) visiblePower);
1424 // if (fraction != source) {
1425 // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1426 // if (diff > 0.00000001d) {
1427 // throw new IllegalArgumentException();
1432 FixedDecimal::FixedDecimal(double n
, int32_t v
) {
1433 // Ugly, but for samples we don't care.
1434 init(n
, v
, getFractionalDigits(n
, v
));
1437 FixedDecimal::FixedDecimal(double n
) {
1441 FixedDecimal::FixedDecimal() {
1446 // Create a FixedDecimal from a UnicodeString containing a number.
1447 // Inefficient, but only used for samples, so simplicity trumps efficiency.
1449 FixedDecimal::FixedDecimal(const UnicodeString
&num
, UErrorCode
&status
) {
1451 cs
.appendInvariantChars(num
, status
);
1453 dl
.set(cs
.toStringPiece(), status
);
1454 if (U_FAILURE(status
)) {
1458 int32_t decimalPoint
= num
.indexOf(DOT
);
1459 double n
= dl
.getDouble();
1460 if (decimalPoint
== -1) {
1463 int32_t v
= num
.length() - decimalPoint
- 1;
1464 init(n
, v
, getFractionalDigits(n
, v
));
1469 FixedDecimal::FixedDecimal(const FixedDecimal
&other
) {
1470 source
= other
.source
;
1471 visibleDecimalDigitCount
= other
.visibleDecimalDigitCount
;
1472 decimalDigits
= other
.decimalDigits
;
1473 decimalDigitsWithoutTrailingZeros
= other
.decimalDigitsWithoutTrailingZeros
;
1474 intValue
= other
.intValue
;
1475 hasIntegerValue
= other
.hasIntegerValue
;
1476 isNegative
= other
.isNegative
;
1477 isNanOrInfinity
= other
.isNanOrInfinity
;
1481 void FixedDecimal::init(double n
) {
1482 int32_t numFractionDigits
= decimals(n
);
1483 init(n
, numFractionDigits
, getFractionalDigits(n
, numFractionDigits
));
1487 void FixedDecimal::init(double n
, int32_t v
, int64_t f
) {
1488 isNegative
= n
< 0.0;
1490 isNanOrInfinity
= uprv_isNaN(source
) || uprv_isPositiveInfinity(source
);
1491 if (isNanOrInfinity
) {
1495 hasIntegerValue
= FALSE
;
1497 intValue
= (int64_t)source
;
1498 hasIntegerValue
= (source
== intValue
);
1501 visibleDecimalDigitCount
= v
;
1504 decimalDigitsWithoutTrailingZeros
= 0;
1507 while ((fdwtz%10
) == 0) {
1510 decimalDigitsWithoutTrailingZeros
= fdwtz
;
1515 // Fast path only exact initialization. Return true if successful.
1516 // Note: Do not multiply by 10 each time through loop, rounding cruft can build
1517 // up that makes the check for an integer result fail.
1518 // A single multiply of the original number works more reliably.
1519 static int32_t p10
[] = {1, 10, 100, 1000, 10000};
1520 UBool
FixedDecimal::quickInit(double n
) {
1521 UBool success
= FALSE
;
1523 int32_t numFractionDigits
;
1524 for (numFractionDigits
= 0; numFractionDigits
<= 3; numFractionDigits
++) {
1525 double scaledN
= n
* p10
[numFractionDigits
];
1526 if (scaledN
== floor(scaledN
)) {
1532 init(n
, numFractionDigits
, getFractionalDigits(n
, numFractionDigits
));
1539 int32_t FixedDecimal::decimals(double n
) {
1540 // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1541 // fastpath the common cases, integers or fractions with 3 or fewer digits
1543 for (int ndigits
=0; ndigits
<=3; ndigits
++) {
1544 double scaledN
= n
* p10
[ndigits
];
1545 if (scaledN
== floor(scaledN
)) {
1550 // Slow path, convert with sprintf, parse converted output.
1552 sprintf(buf
, "%1.15e", n
);
1553 // formatted number looks like this: 1.234567890123457e-01
1554 int exponent
= atoi(buf
+18);
1555 int numFractionDigits
= 15;
1556 for (int i
=16; ; --i
) {
1557 if (buf
[i
] != '0') {
1560 --numFractionDigits
;
1562 numFractionDigits
-= exponent
; // Fraction part of fixed point representation.
1563 return numFractionDigits
;
1567 // Get the fraction digits of a double, represented as an integer.
1568 // v is the number of visible fraction digits in the displayed form of the number.
1569 // Example: n = 1001.234, v = 6, result = 234000
1570 // TODO: need to think through how this is used in the plural rule context.
1571 // This function can easily encounter integer overflow,
1572 // and can easily return noise digits when the precision of a double is exceeded.
1574 int64_t FixedDecimal::getFractionalDigits(double n
, int32_t v
) {
1575 if (v
== 0 || n
== floor(n
) || uprv_isNaN(n
) || uprv_isPositiveInfinity(n
)) {
1579 double fract
= n
- floor(n
);
1581 case 1: return (int64_t)(fract
*10.0 + 0.5);
1582 case 2: return (int64_t)(fract
*100.0 + 0.5);
1583 case 3: return (int64_t)(fract
*1000.0 + 0.5);
1585 double scaled
= floor(fract
* pow(10.0, (double)v
) + 0.5);
1586 if (scaled
> U_INT64_MAX
) {
1589 return (int64_t)scaled
;
1595 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits
) {
1596 int32_t numTrailingFractionZeros
= minFractionDigits
- visibleDecimalDigitCount
;
1597 if (numTrailingFractionZeros
> 0) {
1598 for (int32_t i
=0; i
<numTrailingFractionZeros
; i
++) {
1599 // Do not let the decimalDigits value overflow if there are many trailing zeros.
1600 // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1601 if (decimalDigits
>= 100000000000000000LL) {
1604 decimalDigits
*= 10;
1606 visibleDecimalDigitCount
+= numTrailingFractionZeros
;
1611 double FixedDecimal::get(tokenType operand
) const {
1613 case tVariableN
: return source
;
1614 case tVariableI
: return (double)intValue
;
1615 case tVariableF
: return (double)decimalDigits
;
1616 case tVariableT
: return (double)decimalDigitsWithoutTrailingZeros
;
1617 case tVariableV
: return visibleDecimalDigitCount
;
1619 U_ASSERT(FALSE
); // unexpected.
1624 int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1625 return visibleDecimalDigitCount
;
1630 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode
&status
) {
1633 fOpenStatus
= status
;
1634 if (U_FAILURE(status
)) {
1637 fOpenStatus
= U_ZERO_ERROR
;
1638 LocalUResourceBundlePointer
rb(ures_openDirect(NULL
, "plurals", &fOpenStatus
));
1639 fLocales
= ures_getByKey(rb
.getAlias(), "locales", NULL
, &fOpenStatus
);
1642 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1643 ures_close(fLocales
);
1649 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength
, UErrorCode
&status
) {
1650 if (U_FAILURE(status
)) {
1653 if (U_FAILURE(fOpenStatus
)) {
1654 status
= fOpenStatus
;
1657 fRes
= ures_getNextResource(fLocales
, fRes
, &status
);
1658 if (fRes
== NULL
|| U_FAILURE(status
)) {
1659 if (status
== U_INDEX_OUTOFBOUNDS_ERROR
) {
1660 status
= U_ZERO_ERROR
;
1664 const char *result
= ures_getKey(fRes
);
1665 if (resultLength
!= NULL
) {
1666 *resultLength
= uprv_strlen(result
);
1672 void PluralAvailableLocalesEnumeration::reset(UErrorCode
&status
) {
1673 if (U_FAILURE(status
)) {
1676 if (U_FAILURE(fOpenStatus
)) {
1677 status
= fOpenStatus
;
1680 ures_resetIterator(fLocales
);
1683 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode
&status
) const {
1684 if (U_FAILURE(status
)) {
1687 if (U_FAILURE(fOpenStatus
)) {
1688 status
= fOpenStatus
;
1691 return ures_getSize(fLocales
);
1697 #endif /* #if !UCONFIG_NO_FORMATTING */