2 *******************************************************************************
3 * Copyright (C) 2007-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
13 #include "unicode/utypes.h"
14 #include "unicode/localpointer.h"
15 #include "unicode/plurrule.h"
16 #include "unicode/upluralrules.h"
17 #include "unicode/ures.h"
25 #include "patternprops.h"
26 #include "plurrule_impl.h"
32 #include "sharedpluralrules.h"
33 #include "unifiedcache.h"
35 #if !UCONFIG_NO_FORMATTING
39 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
41 static const UChar PLURAL_KEYWORD_OTHER
[]={LOW_O
,LOW_T
,LOW_H
,LOW_E
,LOW_R
,0};
42 static const UChar PLURAL_DEFAULT_RULE
[]={LOW_O
,LOW_T
,LOW_H
,LOW_E
,LOW_R
,COLON
,SPACE
,LOW_N
,0};
43 static const UChar PK_IN
[]={LOW_I
,LOW_N
,0};
44 static const UChar PK_NOT
[]={LOW_N
,LOW_O
,LOW_T
,0};
45 static const UChar PK_IS
[]={LOW_I
,LOW_S
,0};
46 static const UChar PK_MOD
[]={LOW_M
,LOW_O
,LOW_D
,0};
47 static const UChar PK_AND
[]={LOW_A
,LOW_N
,LOW_D
,0};
48 static const UChar PK_OR
[]={LOW_O
,LOW_R
,0};
49 static const UChar PK_VAR_N
[]={LOW_N
,0};
50 static const UChar PK_VAR_I
[]={LOW_I
,0};
51 static const UChar PK_VAR_F
[]={LOW_F
,0};
52 static const UChar PK_VAR_T
[]={LOW_T
,0};
53 static const UChar PK_VAR_V
[]={LOW_V
,0};
54 static const UChar PK_WITHIN
[]={LOW_W
,LOW_I
,LOW_T
,LOW_H
,LOW_I
,LOW_N
,0};
55 static const UChar PK_DECIMAL
[]={LOW_D
,LOW_E
,LOW_C
,LOW_I
,LOW_M
,LOW_A
,LOW_L
,0};
56 static const UChar PK_INTEGER
[]={LOW_I
,LOW_N
,LOW_T
,LOW_E
,LOW_G
,LOW_E
,LOW_R
,0};
58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules
)
59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration
)
61 PluralRules::PluralRules(UErrorCode
& /*status*/)
67 PluralRules::PluralRules(const PluralRules
& other
)
74 PluralRules::~PluralRules() {
78 SharedPluralRules::~SharedPluralRules() {
83 PluralRules::clone() const {
84 return new PluralRules(*this);
88 PluralRules::operator=(const PluralRules
& other
) {
91 if (other
.mRules
==NULL
) {
95 mRules
= new RuleChain(*other
.mRules
);
102 StringEnumeration
* PluralRules::getAvailableLocales(UErrorCode
&status
) {
103 StringEnumeration
*result
= new PluralAvailableLocalesEnumeration(status
);
104 if (result
== NULL
&& U_SUCCESS(status
)) {
105 status
= U_MEMORY_ALLOCATION_ERROR
;
107 if (U_FAILURE(status
)) {
115 PluralRules
* U_EXPORT2
116 PluralRules::createRules(const UnicodeString
& description
, UErrorCode
& status
) {
117 if (U_FAILURE(status
)) {
121 PluralRuleParser parser
;
122 PluralRules
*newRules
= new PluralRules(status
);
123 if (U_SUCCESS(status
) && newRules
== NULL
) {
124 status
= U_MEMORY_ALLOCATION_ERROR
;
126 parser
.parse(description
, newRules
, status
);
127 if (U_FAILURE(status
)) {
135 PluralRules
* U_EXPORT2
136 PluralRules::createDefaultRules(UErrorCode
& status
) {
137 return createRules(UnicodeString(TRUE
, PLURAL_DEFAULT_RULE
, -1), status
);
140 /******************************************************************************/
141 /* Create PluralRules cache */
143 template<> U_I18N_API
144 const SharedPluralRules
*LocaleCacheKey
<SharedPluralRules
>::createObject(
145 const void * /*unused*/, UErrorCode
&status
) const {
146 const char *localeId
= fLoc
.getName();
147 PluralRules
*pr
= PluralRules::internalForLocale(
148 localeId
, UPLURAL_TYPE_CARDINAL
, status
);
149 if (U_FAILURE(status
)) {
152 SharedPluralRules
*result
= new SharedPluralRules(pr
);
153 if (result
== NULL
) {
154 status
= U_MEMORY_ALLOCATION_ERROR
;
162 /* end plural rules cache */
163 /******************************************************************************/
165 const SharedPluralRules
* U_EXPORT2
166 PluralRules::createSharedInstance(
167 const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
168 if (U_FAILURE(status
)) {
171 if (type
!= UPLURAL_TYPE_CARDINAL
) {
172 status
= U_UNSUPPORTED_ERROR
;
175 const SharedPluralRules
*result
= NULL
;
176 UnifiedCache::getByLocale(locale
, result
, status
);
180 PluralRules
* U_EXPORT2
181 PluralRules::forLocale(const Locale
& locale
, UErrorCode
& status
) {
182 return forLocale(locale
, UPLURAL_TYPE_CARDINAL
, status
);
185 PluralRules
* U_EXPORT2
186 PluralRules::forLocale(const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
187 if (type
!= UPLURAL_TYPE_CARDINAL
) {
188 return internalForLocale(locale
, type
, status
);
190 const SharedPluralRules
*shared
= createSharedInstance(
191 locale
, type
, status
);
192 if (U_FAILURE(status
)) {
195 PluralRules
*result
= (*shared
)->clone();
197 if (result
== NULL
) {
198 status
= U_MEMORY_ALLOCATION_ERROR
;
203 PluralRules
* U_EXPORT2
204 PluralRules::internalForLocale(const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
205 if (U_FAILURE(status
)) {
208 if (type
>= UPLURAL_TYPE_COUNT
) {
209 status
= U_ILLEGAL_ARGUMENT_ERROR
;
212 PluralRules
*newObj
= new PluralRules(status
);
213 if (newObj
==NULL
|| U_FAILURE(status
)) {
217 UnicodeString locRule
= newObj
->getRuleFromResource(locale
, type
, status
);
218 // TODO: which errors, if any, should be returned?
219 if (locRule
.length() == 0) {
220 // Locales with no specific rules (all numbers have the "other" category
221 // will return a U_MISSING_RESOURCE_ERROR at this point. This is not
223 locRule
= UnicodeString(PLURAL_DEFAULT_RULE
);
224 status
= U_ZERO_ERROR
;
226 PluralRuleParser parser
;
227 parser
.parse(locRule
, newObj
, status
);
228 // TODO: should rule parse errors be returned, or
229 // should we silently use default rules?
230 // Original impl used default rules.
231 // Ask the question to ICU Core.
237 PluralRules::select(int32_t number
) const {
238 return select(FixedDecimal(number
));
242 PluralRules::select(double number
) const {
243 return select(FixedDecimal(number
));
247 PluralRules::select(const FixedDecimal
&number
) const {
248 if (mRules
== NULL
) {
249 return UnicodeString(TRUE
, PLURAL_DEFAULT_RULE
, -1);
252 return mRules
->select(number
);
257 PluralRules::getKeywords(UErrorCode
& status
) const {
258 if (U_FAILURE(status
)) return NULL
;
259 StringEnumeration
* nameEnumerator
= new PluralKeywordEnumeration(mRules
, status
);
260 if (U_FAILURE(status
)) {
261 delete nameEnumerator
;
265 return nameEnumerator
;
269 PluralRules::getUniqueKeywordValue(const UnicodeString
& /* keyword */) {
271 return UPLRULES_NO_UNIQUE_VALUE
;
275 PluralRules::getAllKeywordValues(const UnicodeString
& /* keyword */, double * /* dest */,
276 int32_t /* destCapacity */, UErrorCode
& error
) {
277 error
= U_UNSUPPORTED_ERROR
;
282 static double scaleForInt(double d
) {
284 while (d
!= floor(d
)) {
286 scale
= scale
* 10.0;
292 getSamplesFromString(const UnicodeString
&samples
, double *dest
,
293 int32_t destCapacity
, UErrorCode
& status
) {
294 int32_t sampleCount
= 0;
295 int32_t sampleStartIdx
= 0;
296 int32_t sampleEndIdx
= 0;
298 //std::string ss; // TODO: debugging.
299 // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
300 for (sampleCount
= 0; sampleCount
< destCapacity
&& sampleStartIdx
< samples
.length(); ) {
301 sampleEndIdx
= samples
.indexOf(COMMA
, sampleStartIdx
);
302 if (sampleEndIdx
== -1) {
303 sampleEndIdx
= samples
.length();
305 const UnicodeString
&sampleRange
= samples
.tempSubStringBetween(sampleStartIdx
, sampleEndIdx
);
307 // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
308 int32_t tildeIndex
= sampleRange
.indexOf(TILDE
);
309 if (tildeIndex
< 0) {
310 FixedDecimal
fixed(sampleRange
, status
);
311 double sampleValue
= fixed
.source
;
312 if (fixed
.visibleDecimalDigitCount
== 0 || sampleValue
!= floor(sampleValue
)) {
313 dest
[sampleCount
++] = sampleValue
;
317 FixedDecimal
fixedLo(sampleRange
.tempSubStringBetween(0, tildeIndex
), status
);
318 FixedDecimal
fixedHi(sampleRange
.tempSubStringBetween(tildeIndex
+1), status
);
319 double rangeLo
= fixedLo
.source
;
320 double rangeHi
= fixedHi
.source
;
321 if (U_FAILURE(status
)) {
324 if (rangeHi
< rangeLo
) {
325 status
= U_INVALID_FORMAT_ERROR
;
329 // For ranges of samples with fraction decimal digits, scale the number up so that we
330 // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
332 double scale
= scaleForInt(rangeLo
);
333 double t
= scaleForInt(rangeHi
);
339 for (double n
=rangeLo
; n
<=rangeHi
; n
+=1) {
340 // Hack Alert: don't return any decimal samples with integer values that
341 // originated from a format with trailing decimals.
342 // This API is returning doubles, which can't distinguish having displayed
343 // zeros to the right of the decimal.
344 // This results in test failures with values mapping back to a different keyword.
345 double sampleValue
= n
/scale
;
346 if (!(sampleValue
== floor(sampleValue
) && fixedLo
.visibleDecimalDigitCount
> 0)) {
347 dest
[sampleCount
++] = sampleValue
;
349 if (sampleCount
>= destCapacity
) {
354 sampleStartIdx
= sampleEndIdx
+ 1;
361 PluralRules::getSamples(const UnicodeString
&keyword
, double *dest
,
362 int32_t destCapacity
, UErrorCode
& status
) {
363 RuleChain
*rc
= rulesForKeyword(keyword
);
364 if (rc
== NULL
|| destCapacity
== 0 || U_FAILURE(status
)) {
367 int32_t numSamples
= getSamplesFromString(rc
->fIntegerSamples
, dest
, destCapacity
, status
);
368 if (numSamples
== 0) {
369 numSamples
= getSamplesFromString(rc
->fDecimalSamples
, dest
, destCapacity
, status
);
375 RuleChain
*PluralRules::rulesForKeyword(const UnicodeString
&keyword
) const {
377 for (rc
= mRules
; rc
!= NULL
; rc
= rc
->fNext
) {
378 if (rc
->fKeyword
== keyword
) {
387 PluralRules::isKeyword(const UnicodeString
& keyword
) const {
388 if (0 == keyword
.compare(PLURAL_KEYWORD_OTHER
, 5)) {
391 return rulesForKeyword(keyword
) != NULL
;
395 PluralRules::getKeywordOther() const {
396 return UnicodeString(TRUE
, PLURAL_KEYWORD_OTHER
, 5);
400 PluralRules::operator==(const PluralRules
& other
) const {
401 const UnicodeString
*ptrKeyword
;
402 UErrorCode status
= U_ZERO_ERROR
;
404 if ( this == &other
) {
407 LocalPointer
<StringEnumeration
> myKeywordList(getKeywords(status
));
408 LocalPointer
<StringEnumeration
> otherKeywordList(other
.getKeywords(status
));
409 if (U_FAILURE(status
)) {
413 if (myKeywordList
->count(status
)!=otherKeywordList
->count(status
)) {
416 myKeywordList
->reset(status
);
417 while ((ptrKeyword
=myKeywordList
->snext(status
))!=NULL
) {
418 if (!other
.isKeyword(*ptrKeyword
)) {
422 otherKeywordList
->reset(status
);
423 while ((ptrKeyword
=otherKeywordList
->snext(status
))!=NULL
) {
424 if (!this->isKeyword(*ptrKeyword
)) {
428 if (U_FAILURE(status
)) {
437 PluralRuleParser::parse(const UnicodeString
& ruleData
, PluralRules
*prules
, UErrorCode
&status
)
439 if (U_FAILURE(status
)) {
442 U_ASSERT(ruleIndex
== 0); // Parsers are good for a single use only!
445 while (ruleIndex
< ruleSrc
->length()) {
446 getNextToken(status
);
447 if (U_FAILURE(status
)) {
451 if (U_FAILURE(status
)) {
456 U_ASSERT(curAndConstraint
!= NULL
);
457 curAndConstraint
= curAndConstraint
->add();
461 U_ASSERT(currentChain
!= NULL
);
462 OrConstraint
*orNode
=currentChain
->ruleHeader
;
463 while (orNode
->next
!= NULL
) {
464 orNode
= orNode
->next
;
466 orNode
->next
= new OrConstraint();
469 curAndConstraint
= orNode
->add();
473 U_ASSERT(curAndConstraint
!= NULL
);
474 U_ASSERT(curAndConstraint
->value
== -1);
475 U_ASSERT(curAndConstraint
->rangeList
== NULL
);
478 U_ASSERT(curAndConstraint
!= NULL
);
479 curAndConstraint
->negated
=TRUE
;
483 curAndConstraint
->negated
=TRUE
;
487 U_ASSERT(curAndConstraint
!= NULL
);
488 curAndConstraint
->rangeList
= new UVector32(status
);
489 curAndConstraint
->rangeList
->addElement(-1, status
); // range Low
490 curAndConstraint
->rangeList
->addElement(-1, status
); // range Hi
493 curAndConstraint
->value
=PLURAL_RANGE_HIGH
;
494 curAndConstraint
->integerOnly
= (type
!= tWithin
);
497 U_ASSERT(curAndConstraint
!= NULL
);
498 if ( (curAndConstraint
->op
==AndConstraint::MOD
)&&
499 (curAndConstraint
->opNum
== -1 ) ) {
500 curAndConstraint
->opNum
=getNumberValue(token
);
503 if (curAndConstraint
->rangeList
== NULL
) {
504 // this is for an 'is' rule
505 curAndConstraint
->value
= getNumberValue(token
);
507 // this is for an 'in' or 'within' rule
508 if (curAndConstraint
->rangeList
->elementAti(rangeLowIdx
) == -1) {
509 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeLowIdx
);
510 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeHiIdx
);
513 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeHiIdx
);
514 if (curAndConstraint
->rangeList
->elementAti(rangeLowIdx
) >
515 curAndConstraint
->rangeList
->elementAti(rangeHiIdx
)) {
516 // Range Lower bound > Range Upper bound.
517 // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
518 // used for all plural rule parse errors.
519 status
= U_UNEXPECTED_TOKEN
;
527 // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
528 // Catch cases like "n mod 10, is 1" here instead.
529 if (curAndConstraint
== NULL
|| curAndConstraint
->rangeList
== NULL
) {
530 status
= U_UNEXPECTED_TOKEN
;
533 U_ASSERT(curAndConstraint
->rangeList
->size() >= 2);
534 rangeLowIdx
= curAndConstraint
->rangeList
->size();
535 curAndConstraint
->rangeList
->addElement(-1, status
); // range Low
536 rangeHiIdx
= curAndConstraint
->rangeList
->size();
537 curAndConstraint
->rangeList
->addElement(-1, status
); // range Hi
540 U_ASSERT(curAndConstraint
!= NULL
);
541 curAndConstraint
->op
=AndConstraint::MOD
;
548 U_ASSERT(curAndConstraint
!= NULL
);
549 curAndConstraint
->digitsType
= type
;
553 RuleChain
*newChain
= new RuleChain
;
554 if (newChain
== NULL
) {
555 status
= U_MEMORY_ALLOCATION_ERROR
;
558 newChain
->fKeyword
= token
;
559 if (prules
->mRules
== NULL
) {
560 prules
->mRules
= newChain
;
562 // The new rule chain goes at the end of the linked list of rule chains,
563 // unless there is an "other" keyword & chain. "other" must remain last.
564 RuleChain
*insertAfter
= prules
->mRules
;
565 while (insertAfter
->fNext
!=NULL
&&
566 insertAfter
->fNext
->fKeyword
.compare(PLURAL_KEYWORD_OTHER
, 5) != 0 ){
567 insertAfter
=insertAfter
->fNext
;
569 newChain
->fNext
= insertAfter
->fNext
;
570 insertAfter
->fNext
= newChain
;
572 OrConstraint
*orNode
= new OrConstraint();
573 newChain
->ruleHeader
= orNode
;
574 curAndConstraint
= orNode
->add();
575 currentChain
= newChain
;
581 getNextToken(status
);
582 if (U_FAILURE(status
) || type
== tSemiColon
|| type
== tEOF
|| type
== tAt
) {
585 if (type
== tEllipsis
) {
586 currentChain
->fIntegerSamplesUnbounded
= TRUE
;
589 currentChain
->fIntegerSamples
.append(token
);
595 getNextToken(status
);
596 if (U_FAILURE(status
) || type
== tSemiColon
|| type
== tEOF
|| type
== tAt
) {
599 if (type
== tEllipsis
) {
600 currentChain
->fDecimalSamplesUnbounded
= TRUE
;
603 currentChain
->fDecimalSamples
.append(token
);
611 if (U_FAILURE(status
)) {
618 PluralRules::getRuleFromResource(const Locale
& locale
, UPluralType type
, UErrorCode
& errCode
) {
619 UnicodeString emptyStr
;
621 if (U_FAILURE(errCode
)) {
624 LocalUResourceBundlePointer
rb(ures_openDirect(NULL
, "plurals", &errCode
));
625 if(U_FAILURE(errCode
)) {
630 case UPLURAL_TYPE_CARDINAL
:
633 case UPLURAL_TYPE_ORDINAL
:
634 typeKey
= "locales_ordinals";
637 // Must not occur: The caller should have checked for valid types.
638 errCode
= U_ILLEGAL_ARGUMENT_ERROR
;
641 LocalUResourceBundlePointer
locRes(ures_getByKey(rb
.getAlias(), typeKey
, NULL
, &errCode
));
642 if(U_FAILURE(errCode
)) {
646 const char *curLocaleName
=locale
.getName();
647 const UChar
* s
= ures_getStringByKey(locRes
.getAlias(), curLocaleName
, &resLen
, &errCode
);
650 // Check parent locales.
651 UErrorCode status
= U_ZERO_ERROR
;
652 char parentLocaleName
[ULOC_FULLNAME_CAPACITY
];
653 const char *curLocaleName
=locale
.getName();
654 uprv_strcpy(parentLocaleName
, curLocaleName
);
656 while (uloc_getParent(parentLocaleName
, parentLocaleName
,
657 ULOC_FULLNAME_CAPACITY
, &status
) > 0) {
659 s
= ures_getStringByKey(locRes
.getAlias(), parentLocaleName
, &resLen
, &status
);
661 errCode
= U_ZERO_ERROR
;
664 status
= U_ZERO_ERROR
;
672 u_UCharsToChars(s
, setKey
, resLen
+ 1);
673 // printf("\n PluralRule: %s\n", setKey);
675 LocalUResourceBundlePointer
ruleRes(ures_getByKey(rb
.getAlias(), "rules", NULL
, &errCode
));
676 if(U_FAILURE(errCode
)) {
679 LocalUResourceBundlePointer
setRes(ures_getByKey(ruleRes
.getAlias(), setKey
, NULL
, &errCode
));
680 if (U_FAILURE(errCode
)) {
684 int32_t numberKeys
= ures_getSize(setRes
.getAlias());
685 UnicodeString result
;
686 const char *key
=NULL
;
687 for(int32_t i
=0; i
<numberKeys
; ++i
) { // Keys are zero, one, few, ...
688 UnicodeString rules
= ures_getNextUnicodeString(setRes
.getAlias(), &key
, &errCode
);
689 UnicodeString
uKey(key
, -1, US_INV
);
691 result
.append(COLON
);
692 result
.append(rules
);
693 result
.append(SEMI_COLON
);
700 PluralRules::getRules() const {
702 if (mRules
!= NULL
) {
703 mRules
->dumpRules(rules
);
709 AndConstraint::AndConstraint() {
710 op
= AndConstraint::NONE
;
721 AndConstraint::AndConstraint(const AndConstraint
& other
) {
723 this->opNum
=other
.opNum
;
724 this->value
=other
.value
;
725 this->rangeList
=NULL
;
726 if (other
.rangeList
!= NULL
) {
727 UErrorCode status
= U_ZERO_ERROR
;
728 this->rangeList
= new UVector32(status
);
729 this->rangeList
->assign(*other
.rangeList
, status
);
731 this->integerOnly
=other
.integerOnly
;
732 this->negated
=other
.negated
;
733 this->digitsType
= other
.digitsType
;
734 if (other
.next
==NULL
) {
738 this->next
= new AndConstraint(*other
.next
);
742 AndConstraint::~AndConstraint() {
751 AndConstraint::isFulfilled(const FixedDecimal
&number
) {
753 if (digitsType
== none
) {
754 // An empty AndConstraint, created by a rule with a keyword but no following expression.
757 double n
= number
.get(digitsType
); // pulls n | i | v | f value for the number.
758 // Will always be positive.
759 // May be non-integer (n option only)
761 if (integerOnly
&& n
!= uprv_floor(n
)) {
769 if (rangeList
== NULL
) {
770 result
= value
== -1 || // empty rule
771 n
== value
; // 'is' rule
774 result
= FALSE
; // 'in' or 'within' rule
775 for (int32_t r
=0; r
<rangeList
->size(); r
+=2) {
776 if (rangeList
->elementAti(r
) <= n
&& n
<= rangeList
->elementAti(r
+1)) {
793 this->next
= new AndConstraint();
797 OrConstraint::OrConstraint() {
802 OrConstraint::OrConstraint(const OrConstraint
& other
) {
803 if ( other
.childNode
== NULL
) {
804 this->childNode
= NULL
;
807 this->childNode
= new AndConstraint(*(other
.childNode
));
809 if (other
.next
== NULL
) {
813 this->next
= new OrConstraint(*(other
.next
));
817 OrConstraint::~OrConstraint() {
818 if (childNode
!=NULL
) {
829 OrConstraint
*curOrConstraint
=this;
831 while (curOrConstraint
->next
!=NULL
) {
832 curOrConstraint
= curOrConstraint
->next
;
834 U_ASSERT(curOrConstraint
->childNode
== NULL
);
835 curOrConstraint
->childNode
= new AndConstraint();
837 return curOrConstraint
->childNode
;
841 OrConstraint::isFulfilled(const FixedDecimal
&number
) {
842 OrConstraint
* orRule
=this;
845 while (orRule
!=NULL
&& !result
) {
847 AndConstraint
* andRule
= orRule
->childNode
;
848 while (andRule
!=NULL
&& result
) {
849 result
= andRule
->isFulfilled(number
);
850 andRule
=andRule
->next
;
852 orRule
= orRule
->next
;
859 RuleChain::RuleChain(): fKeyword(), fNext(NULL
), ruleHeader(NULL
), fDecimalSamples(), fIntegerSamples(),
860 fDecimalSamplesUnbounded(FALSE
), fIntegerSamplesUnbounded(FALSE
) {
863 RuleChain::RuleChain(const RuleChain
& other
) :
864 fKeyword(other
.fKeyword
), fNext(NULL
), ruleHeader(NULL
), fDecimalSamples(other
.fDecimalSamples
),
865 fIntegerSamples(other
.fIntegerSamples
), fDecimalSamplesUnbounded(other
.fDecimalSamplesUnbounded
),
866 fIntegerSamplesUnbounded(other
.fIntegerSamplesUnbounded
) {
867 if (other
.ruleHeader
!= NULL
) {
868 this->ruleHeader
= new OrConstraint(*(other
.ruleHeader
));
870 if (other
.fNext
!= NULL
) {
871 this->fNext
= new RuleChain(*other
.fNext
);
875 RuleChain::~RuleChain() {
882 RuleChain::select(const FixedDecimal
&number
) const {
883 if (!number
.isNanOrInfinity
) {
884 for (const RuleChain
*rules
= this; rules
!= NULL
; rules
= rules
->fNext
) {
885 if (rules
->ruleHeader
->isFulfilled(number
)) {
886 return rules
->fKeyword
;
890 return UnicodeString(TRUE
, PLURAL_KEYWORD_OTHER
, 5);
893 static UnicodeString
tokenString(tokenType tok
) {
897 s
.append(LOW_N
); break;
899 s
.append(LOW_I
); break;
901 s
.append(LOW_F
); break;
903 s
.append(LOW_V
); break;
905 s
.append(LOW_T
); break;
913 RuleChain::dumpRules(UnicodeString
& result
) {
914 UChar digitString
[16];
916 if ( ruleHeader
!= NULL
) {
920 OrConstraint
* orRule
=ruleHeader
;
921 while ( orRule
!= NULL
) {
922 AndConstraint
* andRule
=orRule
->childNode
;
923 while ( andRule
!= NULL
) {
924 if ((andRule
->op
==AndConstraint::NONE
) && (andRule
->rangeList
==NULL
) && (andRule
->value
== -1)) {
926 } else if ( (andRule
->op
==AndConstraint::NONE
) && (andRule
->rangeList
==NULL
) ) {
927 result
+= tokenString(andRule
->digitsType
);
928 result
+= UNICODE_STRING_SIMPLE(" is ");
929 if (andRule
->negated
) {
930 result
+= UNICODE_STRING_SIMPLE("not ");
932 uprv_itou(digitString
,16, andRule
->value
,10,0);
933 result
+= UnicodeString(digitString
);
936 result
+= tokenString(andRule
->digitsType
);
938 if (andRule
->op
==AndConstraint::MOD
) {
939 result
+= UNICODE_STRING_SIMPLE("mod ");
940 uprv_itou(digitString
,16, andRule
->opNum
,10,0);
941 result
+= UnicodeString(digitString
);
943 if (andRule
->rangeList
==NULL
) {
944 if (andRule
->negated
) {
945 result
+= UNICODE_STRING_SIMPLE(" is not ");
946 uprv_itou(digitString
,16, andRule
->value
,10,0);
947 result
+= UnicodeString(digitString
);
950 result
+= UNICODE_STRING_SIMPLE(" is ");
951 uprv_itou(digitString
,16, andRule
->value
,10,0);
952 result
+= UnicodeString(digitString
);
956 if (andRule
->negated
) {
957 if ( andRule
->integerOnly
) {
958 result
+= UNICODE_STRING_SIMPLE(" not in ");
961 result
+= UNICODE_STRING_SIMPLE(" not within ");
965 if ( andRule
->integerOnly
) {
966 result
+= UNICODE_STRING_SIMPLE(" in ");
969 result
+= UNICODE_STRING_SIMPLE(" within ");
972 for (int32_t r
=0; r
<andRule
->rangeList
->size(); r
+=2) {
973 int32_t rangeLo
= andRule
->rangeList
->elementAti(r
);
974 int32_t rangeHi
= andRule
->rangeList
->elementAti(r
+1);
975 uprv_itou(digitString
,16, rangeLo
, 10, 0);
976 result
+= UnicodeString(digitString
);
977 result
+= UNICODE_STRING_SIMPLE("..");
978 uprv_itou(digitString
,16, rangeHi
, 10,0);
979 result
+= UnicodeString(digitString
);
980 if (r
+2 < andRule
->rangeList
->size()) {
981 result
+= UNICODE_STRING_SIMPLE(", ");
986 if ( (andRule
=andRule
->next
) != NULL
) {
987 result
+= UNICODE_STRING_SIMPLE(" and ");
990 if ( (orRule
= orRule
->next
) != NULL
) {
991 result
+= UNICODE_STRING_SIMPLE(" or ");
995 if ( fNext
!= NULL
) {
996 result
+= UNICODE_STRING_SIMPLE("; ");
997 fNext
->dumpRules(result
);
1003 RuleChain::getKeywords(int32_t capacityOfKeywords
, UnicodeString
* keywords
, int32_t& arraySize
) const {
1004 if ( arraySize
< capacityOfKeywords
-1 ) {
1005 keywords
[arraySize
++]=fKeyword
;
1008 return U_BUFFER_OVERFLOW_ERROR
;
1011 if ( fNext
!= NULL
) {
1012 return fNext
->getKeywords(capacityOfKeywords
, keywords
, arraySize
);
1015 return U_ZERO_ERROR
;
1020 RuleChain::isKeyword(const UnicodeString
& keywordParam
) const {
1021 if ( fKeyword
== keywordParam
) {
1025 if ( fNext
!= NULL
) {
1026 return fNext
->isKeyword(keywordParam
);
1034 PluralRuleParser::PluralRuleParser() :
1035 ruleIndex(0), token(), type(none
), prevType(none
),
1036 curAndConstraint(NULL
), currentChain(NULL
), rangeLowIdx(-1), rangeHiIdx(-1)
1040 PluralRuleParser::~PluralRuleParser() {
1045 PluralRuleParser::getNumberValue(const UnicodeString
& token
) {
1049 i
= token
.extract(0, token
.length(), digits
, ARRAY_SIZE(digits
), US_INV
);
1052 return((int32_t)atoi(digits
));
1057 PluralRuleParser::checkSyntax(UErrorCode
&status
)
1059 if (U_FAILURE(status
)) {
1062 if (!(prevType
==none
|| prevType
==tSemiColon
)) {
1063 type
= getKeyType(token
, type
); // Switch token type from tKeyword if we scanned a reserved word,
1064 // and we are not at the start of a rule, where a
1065 // keyword is expected.
1071 if (type
!=tKeyword
&& type
!= tEOF
) {
1072 status
= U_UNEXPECTED_TOKEN
;
1080 if (type
!= tIs
&& type
!= tMod
&& type
!= tIn
&&
1081 type
!= tNot
&& type
!= tWithin
&& type
!= tEqual
&& type
!= tNotEqual
) {
1082 status
= U_UNEXPECTED_TOKEN
;
1086 if (type
!= tColon
) {
1087 status
= U_UNEXPECTED_TOKEN
;
1091 if (!(type
== tVariableN
||
1092 type
== tVariableI
||
1093 type
== tVariableF
||
1094 type
== tVariableT
||
1095 type
== tVariableV
||
1097 status
= U_UNEXPECTED_TOKEN
;
1101 if ( type
!= tNumber
&& type
!= tNot
) {
1102 status
= U_UNEXPECTED_TOKEN
;
1106 if (type
!= tNumber
&& type
!= tIn
&& type
!= tWithin
) {
1107 status
= U_UNEXPECTED_TOKEN
;
1116 if (type
!= tNumber
) {
1117 status
= U_UNEXPECTED_TOKEN
;
1122 if ( type
!= tVariableN
&&
1123 type
!= tVariableI
&&
1124 type
!= tVariableF
&&
1125 type
!= tVariableT
&&
1126 type
!= tVariableV
) {
1127 status
= U_UNEXPECTED_TOKEN
;
1131 if (type
!= tNumber
) {
1132 status
= U_UNEXPECTED_TOKEN
;
1136 if (type
!= tDot2
&& type
!= tSemiColon
&& type
!= tIs
&& type
!= tNot
&&
1137 type
!= tIn
&& type
!= tEqual
&& type
!= tNotEqual
&& type
!= tWithin
&&
1138 type
!= tAnd
&& type
!= tOr
&& type
!= tComma
&& type
!= tAt
&&
1141 status
= U_UNEXPECTED_TOKEN
;
1143 // TODO: a comma following a number that is not part of a range will be allowed.
1144 // It's not the only case of this sort of thing. Parser needs a re-write.
1147 if (type
!= tDecimal
&& type
!= tInteger
) {
1148 status
= U_UNEXPECTED_TOKEN
;
1152 status
= U_UNEXPECTED_TOKEN
;
1159 * Scan the next token from the input rules.
1160 * rules and returned token type are in the parser state variables.
1163 PluralRuleParser::getNextToken(UErrorCode
&status
)
1165 if (U_FAILURE(status
)) {
1170 while (ruleIndex
< ruleSrc
->length()) {
1171 ch
= ruleSrc
->charAt(ruleIndex
);
1172 type
= charType(ch
);
1173 if (type
!= tSpace
) {
1178 if (ruleIndex
>= ruleSrc
->length()) {
1182 int32_t curIndex
= ruleIndex
;
1189 case tTilde
: // scanned '~'
1190 case tAt
: // scanned '@'
1191 case tEqual
: // scanned '='
1192 case tMod
: // scanned '%'
1193 // Single character tokens.
1197 case tNotEqual
: // scanned '!'
1198 if (ruleSrc
->charAt(curIndex
+1) == EQUALS
) {
1207 while (type
== tKeyword
&& ++curIndex
< ruleSrc
->length()) {
1208 ch
= ruleSrc
->charAt(curIndex
);
1209 type
= charType(ch
);
1215 while (type
== tNumber
&& ++curIndex
< ruleSrc
->length()) {
1216 ch
= ruleSrc
->charAt(curIndex
);
1217 type
= charType(ch
);
1223 // We could be looking at either ".." in a range, or "..." at the end of a sample.
1224 if (curIndex
+1 >= ruleSrc
->length() || ruleSrc
->charAt(curIndex
+1) != DOT
) {
1226 break; // Single dot
1228 if (curIndex
+2 >= ruleSrc
->length() || ruleSrc
->charAt(curIndex
+2) != DOT
) {
1231 break; // double dot
1235 break; // triple dot
1238 status
= U_UNEXPECTED_TOKEN
;
1243 U_ASSERT(ruleIndex
<= ruleSrc
->length());
1244 U_ASSERT(curIndex
<= ruleSrc
->length());
1245 token
=UnicodeString(*ruleSrc
, ruleIndex
, curIndex
-ruleIndex
);
1246 ruleIndex
= curIndex
;
1250 PluralRuleParser::charType(UChar ch
) {
1251 if ((ch
>=U_ZERO
) && (ch
<=U_NINE
)) {
1254 if (ch
>=LOW_A
&& ch
<=LOW_Z
) {
1286 // Set token type for reserved words in the Plural Rule syntax.
1289 PluralRuleParser::getKeyType(const UnicodeString
&token
, tokenType keyType
)
1291 if (keyType
!= tKeyword
) {
1295 if (0 == token
.compare(PK_VAR_N
, 1)) {
1296 keyType
= tVariableN
;
1297 } else if (0 == token
.compare(PK_VAR_I
, 1)) {
1298 keyType
= tVariableI
;
1299 } else if (0 == token
.compare(PK_VAR_F
, 1)) {
1300 keyType
= tVariableF
;
1301 } else if (0 == token
.compare(PK_VAR_T
, 1)) {
1302 keyType
= tVariableT
;
1303 } else if (0 == token
.compare(PK_VAR_V
, 1)) {
1304 keyType
= tVariableV
;
1305 } else if (0 == token
.compare(PK_IS
, 2)) {
1307 } else if (0 == token
.compare(PK_AND
, 3)) {
1309 } else if (0 == token
.compare(PK_IN
, 2)) {
1311 } else if (0 == token
.compare(PK_WITHIN
, 6)) {
1313 } else if (0 == token
.compare(PK_NOT
, 3)) {
1315 } else if (0 == token
.compare(PK_MOD
, 3)) {
1317 } else if (0 == token
.compare(PK_OR
, 2)) {
1319 } else if (0 == token
.compare(PK_DECIMAL
, 7)) {
1321 } else if (0 == token
.compare(PK_INTEGER
, 7)) {
1328 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain
*header
, UErrorCode
& status
)
1329 : pos(0), fKeywordNames(status
) {
1330 if (U_FAILURE(status
)) {
1333 fKeywordNames
.setDeleter(uprv_deleteUObject
);
1334 UBool addKeywordOther
=TRUE
;
1335 RuleChain
*node
=header
;
1337 fKeywordNames
.addElement(new UnicodeString(node
->fKeyword
), status
);
1338 if (U_FAILURE(status
)) {
1341 if (0 == node
->fKeyword
.compare(PLURAL_KEYWORD_OTHER
, 5)) {
1342 addKeywordOther
= FALSE
;
1347 if (addKeywordOther
) {
1348 fKeywordNames
.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER
), status
);
1352 const UnicodeString
*
1353 PluralKeywordEnumeration::snext(UErrorCode
& status
) {
1354 if (U_SUCCESS(status
) && pos
< fKeywordNames
.size()) {
1355 return (const UnicodeString
*)fKeywordNames
.elementAt(pos
++);
1361 PluralKeywordEnumeration::reset(UErrorCode
& /*status*/) {
1366 PluralKeywordEnumeration::count(UErrorCode
& /*status*/) const {
1367 return fKeywordNames
.size();
1370 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1375 FixedDecimal::FixedDecimal(double n
, int32_t v
, int64_t f
) {
1377 // check values. TODO make into unit test.
1379 // long visiblePower = (int) Math.pow(10, v);
1380 // if (decimalDigits > visiblePower) {
1381 // throw new IllegalArgumentException();
1383 // double fraction = intValue + (decimalDigits / (double) visiblePower);
1384 // if (fraction != source) {
1385 // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1386 // if (diff > 0.00000001d) {
1387 // throw new IllegalArgumentException();
1392 FixedDecimal::FixedDecimal(double n
, int32_t v
) {
1393 // Ugly, but for samples we don't care.
1394 init(n
, v
, getFractionalDigits(n
, v
));
1397 FixedDecimal::FixedDecimal(double n
) {
1401 FixedDecimal::FixedDecimal() {
1406 // Create a FixedDecimal from a UnicodeString containing a number.
1407 // Inefficient, but only used for samples, so simplicity trumps efficiency.
1409 FixedDecimal::FixedDecimal(const UnicodeString
&num
, UErrorCode
&status
) {
1411 cs
.appendInvariantChars(num
, status
);
1413 dl
.set(cs
.toStringPiece(), status
);
1414 if (U_FAILURE(status
)) {
1418 int32_t decimalPoint
= num
.indexOf(DOT
);
1419 double n
= dl
.getDouble();
1420 if (decimalPoint
== -1) {
1423 int32_t v
= num
.length() - decimalPoint
- 1;
1424 init(n
, v
, getFractionalDigits(n
, v
));
1429 FixedDecimal::FixedDecimal(const FixedDecimal
&other
) {
1430 source
= other
.source
;
1431 visibleDecimalDigitCount
= other
.visibleDecimalDigitCount
;
1432 decimalDigits
= other
.decimalDigits
;
1433 decimalDigitsWithoutTrailingZeros
= other
.decimalDigitsWithoutTrailingZeros
;
1434 intValue
= other
.intValue
;
1435 hasIntegerValue
= other
.hasIntegerValue
;
1436 isNegative
= other
.isNegative
;
1437 isNanOrInfinity
= other
.isNanOrInfinity
;
1441 void FixedDecimal::init(double n
) {
1442 int32_t numFractionDigits
= decimals(n
);
1443 init(n
, numFractionDigits
, getFractionalDigits(n
, numFractionDigits
));
1447 void FixedDecimal::init(double n
, int32_t v
, int64_t f
) {
1448 isNegative
= n
< 0.0;
1450 isNanOrInfinity
= uprv_isNaN(source
) || uprv_isPositiveInfinity(source
);
1451 if (isNanOrInfinity
) {
1455 hasIntegerValue
= FALSE
;
1457 intValue
= (int64_t)source
;
1458 hasIntegerValue
= (source
== intValue
);
1461 visibleDecimalDigitCount
= v
;
1464 decimalDigitsWithoutTrailingZeros
= 0;
1467 while ((fdwtz%10
) == 0) {
1470 decimalDigitsWithoutTrailingZeros
= fdwtz
;
1475 // Fast path only exact initialization. Return true if successful.
1476 // Note: Do not multiply by 10 each time through loop, rounding cruft can build
1477 // up that makes the check for an integer result fail.
1478 // A single multiply of the original number works more reliably.
1479 static int32_t p10
[] = {1, 10, 100, 1000, 10000};
1480 UBool
FixedDecimal::quickInit(double n
) {
1481 UBool success
= FALSE
;
1483 int32_t numFractionDigits
;
1484 for (numFractionDigits
= 0; numFractionDigits
<= 3; numFractionDigits
++) {
1485 double scaledN
= n
* p10
[numFractionDigits
];
1486 if (scaledN
== floor(scaledN
)) {
1492 init(n
, numFractionDigits
, getFractionalDigits(n
, numFractionDigits
));
1499 int32_t FixedDecimal::decimals(double n
) {
1500 // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1501 // fastpath the common cases, integers or fractions with 3 or fewer digits
1503 for (int ndigits
=0; ndigits
<=3; ndigits
++) {
1504 double scaledN
= n
* p10
[ndigits
];
1505 if (scaledN
== floor(scaledN
)) {
1510 // Slow path, convert with sprintf, parse converted output.
1512 sprintf(buf
, "%1.15e", n
);
1513 // formatted number looks like this: 1.234567890123457e-01
1514 int exponent
= atoi(buf
+18);
1515 int numFractionDigits
= 15;
1516 for (int i
=16; ; --i
) {
1517 if (buf
[i
] != '0') {
1520 --numFractionDigits
;
1522 numFractionDigits
-= exponent
; // Fraction part of fixed point representation.
1523 return numFractionDigits
;
1527 // Get the fraction digits of a double, represented as an integer.
1528 // v is the number of visible fraction digits in the displayed form of the number.
1529 // Example: n = 1001.234, v = 6, result = 234000
1530 // TODO: need to think through how this is used in the plural rule context.
1531 // This function can easily encounter integer overflow,
1532 // and can easily return noise digits when the precision of a double is exceeded.
1534 int64_t FixedDecimal::getFractionalDigits(double n
, int32_t v
) {
1535 if (v
== 0 || n
== floor(n
) || uprv_isNaN(n
) || uprv_isPositiveInfinity(n
)) {
1539 double fract
= n
- floor(n
);
1541 case 1: return (int64_t)(fract
*10.0 + 0.5);
1542 case 2: return (int64_t)(fract
*100.0 + 0.5);
1543 case 3: return (int64_t)(fract
*1000.0 + 0.5);
1545 double scaled
= floor(fract
* pow(10.0, (double)v
) + 0.5);
1546 if (scaled
> U_INT64_MAX
) {
1549 return (int64_t)scaled
;
1555 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits
) {
1556 int32_t numTrailingFractionZeros
= minFractionDigits
- visibleDecimalDigitCount
;
1557 if (numTrailingFractionZeros
> 0) {
1558 for (int32_t i
=0; i
<numTrailingFractionZeros
; i
++) {
1559 // Do not let the decimalDigits value overflow if there are many trailing zeros.
1560 // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1561 if (decimalDigits
>= 100000000000000000LL) {
1564 decimalDigits
*= 10;
1566 visibleDecimalDigitCount
+= numTrailingFractionZeros
;
1571 double FixedDecimal::get(tokenType operand
) const {
1573 case tVariableN
: return source
;
1574 case tVariableI
: return (double)intValue
;
1575 case tVariableF
: return (double)decimalDigits
;
1576 case tVariableT
: return (double)decimalDigitsWithoutTrailingZeros
;
1577 case tVariableV
: return visibleDecimalDigitCount
;
1579 U_ASSERT(FALSE
); // unexpected.
1584 int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1585 return visibleDecimalDigitCount
;
1590 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode
&status
) {
1593 fOpenStatus
= status
;
1594 if (U_FAILURE(status
)) {
1597 fOpenStatus
= U_ZERO_ERROR
;
1598 LocalUResourceBundlePointer
rb(ures_openDirect(NULL
, "plurals", &fOpenStatus
));
1599 fLocales
= ures_getByKey(rb
.getAlias(), "locales", NULL
, &fOpenStatus
);
1602 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1603 ures_close(fLocales
);
1609 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength
, UErrorCode
&status
) {
1610 if (U_FAILURE(status
)) {
1613 if (U_FAILURE(fOpenStatus
)) {
1614 status
= fOpenStatus
;
1617 fRes
= ures_getNextResource(fLocales
, fRes
, &status
);
1618 if (fRes
== NULL
|| U_FAILURE(status
)) {
1619 if (status
== U_INDEX_OUTOFBOUNDS_ERROR
) {
1620 status
= U_ZERO_ERROR
;
1624 const char *result
= ures_getKey(fRes
);
1625 if (resultLength
!= NULL
) {
1626 *resultLength
= uprv_strlen(result
);
1632 void PluralAvailableLocalesEnumeration::reset(UErrorCode
&status
) {
1633 if (U_FAILURE(status
)) {
1636 if (U_FAILURE(fOpenStatus
)) {
1637 status
= fOpenStatus
;
1640 ures_resetIterator(fLocales
);
1643 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode
&status
) const {
1644 if (U_FAILURE(status
)) {
1647 if (U_FAILURE(fOpenStatus
)) {
1648 status
= fOpenStatus
;
1651 return ures_getSize(fLocales
);
1657 #endif /* #if !UCONFIG_NO_FORMATTING */