2 *******************************************************************************
3 * Copyright (C) 2007-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
13 #include "unicode/utypes.h"
14 #include "unicode/localpointer.h"
15 #include "unicode/plurrule.h"
16 #include "unicode/upluralrules.h"
17 #include "unicode/ures.h"
25 #include "patternprops.h"
26 #include "plurrule_impl.h"
32 #include "sharedpluralrules.h"
33 #include "unifiedcache.h"
34 #include "digitinterval.h"
35 #include "visibledigits.h"
38 #if !UCONFIG_NO_FORMATTING
42 static const UChar PLURAL_KEYWORD_OTHER
[]={LOW_O
,LOW_T
,LOW_H
,LOW_E
,LOW_R
,0};
43 static const UChar PLURAL_DEFAULT_RULE
[]={LOW_O
,LOW_T
,LOW_H
,LOW_E
,LOW_R
,COLON
,SPACE
,LOW_N
,0};
44 static const UChar PK_IN
[]={LOW_I
,LOW_N
,0};
45 static const UChar PK_NOT
[]={LOW_N
,LOW_O
,LOW_T
,0};
46 static const UChar PK_IS
[]={LOW_I
,LOW_S
,0};
47 static const UChar PK_MOD
[]={LOW_M
,LOW_O
,LOW_D
,0};
48 static const UChar PK_AND
[]={LOW_A
,LOW_N
,LOW_D
,0};
49 static const UChar PK_OR
[]={LOW_O
,LOW_R
,0};
50 static const UChar PK_VAR_N
[]={LOW_N
,0};
51 static const UChar PK_VAR_I
[]={LOW_I
,0};
52 static const UChar PK_VAR_F
[]={LOW_F
,0};
53 static const UChar PK_VAR_T
[]={LOW_T
,0};
54 static const UChar PK_VAR_V
[]={LOW_V
,0};
55 static const UChar PK_WITHIN
[]={LOW_W
,LOW_I
,LOW_T
,LOW_H
,LOW_I
,LOW_N
,0};
56 static const UChar PK_DECIMAL
[]={LOW_D
,LOW_E
,LOW_C
,LOW_I
,LOW_M
,LOW_A
,LOW_L
,0};
57 static const UChar PK_INTEGER
[]={LOW_I
,LOW_N
,LOW_T
,LOW_E
,LOW_G
,LOW_E
,LOW_R
,0};
59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules
)
60 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration
)
62 PluralRules::PluralRules(UErrorCode
& /*status*/)
68 PluralRules::PluralRules(const PluralRules
& other
)
75 PluralRules::~PluralRules() {
79 SharedPluralRules::~SharedPluralRules() {
84 PluralRules::clone() const {
85 return new PluralRules(*this);
89 PluralRules::operator=(const PluralRules
& other
) {
92 if (other
.mRules
==NULL
) {
96 mRules
= new RuleChain(*other
.mRules
);
103 StringEnumeration
* PluralRules::getAvailableLocales(UErrorCode
&status
) {
104 StringEnumeration
*result
= new PluralAvailableLocalesEnumeration(status
);
105 if (result
== NULL
&& U_SUCCESS(status
)) {
106 status
= U_MEMORY_ALLOCATION_ERROR
;
108 if (U_FAILURE(status
)) {
116 PluralRules
* U_EXPORT2
117 PluralRules::createRules(const UnicodeString
& description
, UErrorCode
& status
) {
118 if (U_FAILURE(status
)) {
122 PluralRuleParser parser
;
123 PluralRules
*newRules
= new PluralRules(status
);
124 if (U_SUCCESS(status
) && newRules
== NULL
) {
125 status
= U_MEMORY_ALLOCATION_ERROR
;
127 parser
.parse(description
, newRules
, status
);
128 if (U_FAILURE(status
)) {
136 PluralRules
* U_EXPORT2
137 PluralRules::createDefaultRules(UErrorCode
& status
) {
138 return createRules(UnicodeString(TRUE
, PLURAL_DEFAULT_RULE
, -1), status
);
141 /******************************************************************************/
142 /* Create PluralRules cache */
144 template<> U_I18N_API
145 const SharedPluralRules
*LocaleCacheKey
<SharedPluralRules
>::createObject(
146 const void * /*unused*/, UErrorCode
&status
) const {
147 const char *localeId
= fLoc
.getName();
148 PluralRules
*pr
= PluralRules::internalForLocale(
149 localeId
, UPLURAL_TYPE_CARDINAL
, status
);
150 if (U_FAILURE(status
)) {
153 SharedPluralRules
*result
= new SharedPluralRules(pr
);
154 if (result
== NULL
) {
155 status
= U_MEMORY_ALLOCATION_ERROR
;
163 /* end plural rules cache */
164 /******************************************************************************/
166 const SharedPluralRules
* U_EXPORT2
167 PluralRules::createSharedInstance(
168 const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
169 if (U_FAILURE(status
)) {
172 if (type
!= UPLURAL_TYPE_CARDINAL
) {
173 status
= U_UNSUPPORTED_ERROR
;
176 const SharedPluralRules
*result
= NULL
;
177 UnifiedCache::getByLocale(locale
, result
, status
);
181 PluralRules
* U_EXPORT2
182 PluralRules::forLocale(const Locale
& locale
, UErrorCode
& status
) {
183 return forLocale(locale
, UPLURAL_TYPE_CARDINAL
, status
);
186 PluralRules
* U_EXPORT2
187 PluralRules::forLocale(const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
188 if (type
!= UPLURAL_TYPE_CARDINAL
) {
189 return internalForLocale(locale
, type
, status
);
191 const SharedPluralRules
*shared
= createSharedInstance(
192 locale
, type
, status
);
193 if (U_FAILURE(status
)) {
196 PluralRules
*result
= (*shared
)->clone();
198 if (result
== NULL
) {
199 status
= U_MEMORY_ALLOCATION_ERROR
;
204 PluralRules
* U_EXPORT2
205 PluralRules::internalForLocale(const Locale
& locale
, UPluralType type
, UErrorCode
& status
) {
206 if (U_FAILURE(status
)) {
209 if (type
>= UPLURAL_TYPE_COUNT
) {
210 status
= U_ILLEGAL_ARGUMENT_ERROR
;
213 PluralRules
*newObj
= new PluralRules(status
);
214 if (newObj
==NULL
|| U_FAILURE(status
)) {
218 UnicodeString locRule
= newObj
->getRuleFromResource(locale
, type
, status
);
219 // TODO: which errors, if any, should be returned?
220 if (locRule
.length() == 0) {
221 // Locales with no specific rules (all numbers have the "other" category
222 // will return a U_MISSING_RESOURCE_ERROR at this point. This is not
224 locRule
= UnicodeString(PLURAL_DEFAULT_RULE
);
225 status
= U_ZERO_ERROR
;
227 PluralRuleParser parser
;
228 parser
.parse(locRule
, newObj
, status
);
229 // TODO: should rule parse errors be returned, or
230 // should we silently use default rules?
231 // Original impl used default rules.
232 // Ask the question to ICU Core.
238 PluralRules::select(int32_t number
) const {
239 return select(FixedDecimal(number
));
243 PluralRules::select(double number
) const {
244 return select(FixedDecimal(number
));
248 PluralRules::select(const FixedDecimal
&number
) const {
249 if (mRules
== NULL
) {
250 return UnicodeString(TRUE
, PLURAL_DEFAULT_RULE
, -1);
253 return mRules
->select(number
);
258 PluralRules::select(const VisibleDigitsWithExponent
&number
) const {
259 if (number
.getExponent() != NULL
) {
260 return UnicodeString(TRUE
, PLURAL_DEFAULT_RULE
, -1);
262 return select(FixedDecimal(number
.getMantissa()));
268 PluralRules::getKeywords(UErrorCode
& status
) const {
269 if (U_FAILURE(status
)) return NULL
;
270 StringEnumeration
* nameEnumerator
= new PluralKeywordEnumeration(mRules
, status
);
271 if (U_FAILURE(status
)) {
272 delete nameEnumerator
;
276 return nameEnumerator
;
280 PluralRules::getUniqueKeywordValue(const UnicodeString
& /* keyword */) {
282 return UPLRULES_NO_UNIQUE_VALUE
;
286 PluralRules::getAllKeywordValues(const UnicodeString
& /* keyword */, double * /* dest */,
287 int32_t /* destCapacity */, UErrorCode
& error
) {
288 error
= U_UNSUPPORTED_ERROR
;
293 static double scaleForInt(double d
) {
295 while (d
!= floor(d
)) {
297 scale
= scale
* 10.0;
303 getSamplesFromString(const UnicodeString
&samples
, double *dest
,
304 int32_t destCapacity
, UErrorCode
& status
) {
305 int32_t sampleCount
= 0;
306 int32_t sampleStartIdx
= 0;
307 int32_t sampleEndIdx
= 0;
309 //std::string ss; // TODO: debugging.
310 // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
311 for (sampleCount
= 0; sampleCount
< destCapacity
&& sampleStartIdx
< samples
.length(); ) {
312 sampleEndIdx
= samples
.indexOf(COMMA
, sampleStartIdx
);
313 if (sampleEndIdx
== -1) {
314 sampleEndIdx
= samples
.length();
316 const UnicodeString
&sampleRange
= samples
.tempSubStringBetween(sampleStartIdx
, sampleEndIdx
);
318 // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
319 int32_t tildeIndex
= sampleRange
.indexOf(TILDE
);
320 if (tildeIndex
< 0) {
321 FixedDecimal
fixed(sampleRange
, status
);
322 double sampleValue
= fixed
.source
;
323 if (fixed
.visibleDecimalDigitCount
== 0 || sampleValue
!= floor(sampleValue
)) {
324 dest
[sampleCount
++] = sampleValue
;
328 FixedDecimal
fixedLo(sampleRange
.tempSubStringBetween(0, tildeIndex
), status
);
329 FixedDecimal
fixedHi(sampleRange
.tempSubStringBetween(tildeIndex
+1), status
);
330 double rangeLo
= fixedLo
.source
;
331 double rangeHi
= fixedHi
.source
;
332 if (U_FAILURE(status
)) {
335 if (rangeHi
< rangeLo
) {
336 status
= U_INVALID_FORMAT_ERROR
;
340 // For ranges of samples with fraction decimal digits, scale the number up so that we
341 // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
343 double scale
= scaleForInt(rangeLo
);
344 double t
= scaleForInt(rangeHi
);
350 for (double n
=rangeLo
; n
<=rangeHi
; n
+=1) {
351 // Hack Alert: don't return any decimal samples with integer values that
352 // originated from a format with trailing decimals.
353 // This API is returning doubles, which can't distinguish having displayed
354 // zeros to the right of the decimal.
355 // This results in test failures with values mapping back to a different keyword.
356 double sampleValue
= n
/scale
;
357 if (!(sampleValue
== floor(sampleValue
) && fixedLo
.visibleDecimalDigitCount
> 0)) {
358 dest
[sampleCount
++] = sampleValue
;
360 if (sampleCount
>= destCapacity
) {
365 sampleStartIdx
= sampleEndIdx
+ 1;
372 PluralRules::getSamples(const UnicodeString
&keyword
, double *dest
,
373 int32_t destCapacity
, UErrorCode
& status
) {
374 RuleChain
*rc
= rulesForKeyword(keyword
);
375 if (rc
== NULL
|| destCapacity
== 0 || U_FAILURE(status
)) {
378 int32_t numSamples
= getSamplesFromString(rc
->fIntegerSamples
, dest
, destCapacity
, status
);
379 if (numSamples
== 0) {
380 numSamples
= getSamplesFromString(rc
->fDecimalSamples
, dest
, destCapacity
, status
);
386 RuleChain
*PluralRules::rulesForKeyword(const UnicodeString
&keyword
) const {
388 for (rc
= mRules
; rc
!= NULL
; rc
= rc
->fNext
) {
389 if (rc
->fKeyword
== keyword
) {
398 PluralRules::isKeyword(const UnicodeString
& keyword
) const {
399 if (0 == keyword
.compare(PLURAL_KEYWORD_OTHER
, 5)) {
402 return rulesForKeyword(keyword
) != NULL
;
406 PluralRules::getKeywordOther() const {
407 return UnicodeString(TRUE
, PLURAL_KEYWORD_OTHER
, 5);
411 PluralRules::operator==(const PluralRules
& other
) const {
412 const UnicodeString
*ptrKeyword
;
413 UErrorCode status
= U_ZERO_ERROR
;
415 if ( this == &other
) {
418 LocalPointer
<StringEnumeration
> myKeywordList(getKeywords(status
));
419 LocalPointer
<StringEnumeration
> otherKeywordList(other
.getKeywords(status
));
420 if (U_FAILURE(status
)) {
424 if (myKeywordList
->count(status
)!=otherKeywordList
->count(status
)) {
427 myKeywordList
->reset(status
);
428 while ((ptrKeyword
=myKeywordList
->snext(status
))!=NULL
) {
429 if (!other
.isKeyword(*ptrKeyword
)) {
433 otherKeywordList
->reset(status
);
434 while ((ptrKeyword
=otherKeywordList
->snext(status
))!=NULL
) {
435 if (!this->isKeyword(*ptrKeyword
)) {
439 if (U_FAILURE(status
)) {
448 PluralRuleParser::parse(const UnicodeString
& ruleData
, PluralRules
*prules
, UErrorCode
&status
)
450 if (U_FAILURE(status
)) {
453 U_ASSERT(ruleIndex
== 0); // Parsers are good for a single use only!
456 while (ruleIndex
< ruleSrc
->length()) {
457 getNextToken(status
);
458 if (U_FAILURE(status
)) {
462 if (U_FAILURE(status
)) {
467 U_ASSERT(curAndConstraint
!= NULL
);
468 curAndConstraint
= curAndConstraint
->add();
472 U_ASSERT(currentChain
!= NULL
);
473 OrConstraint
*orNode
=currentChain
->ruleHeader
;
474 while (orNode
->next
!= NULL
) {
475 orNode
= orNode
->next
;
477 orNode
->next
= new OrConstraint();
480 curAndConstraint
= orNode
->add();
484 U_ASSERT(curAndConstraint
!= NULL
);
485 U_ASSERT(curAndConstraint
->value
== -1);
486 U_ASSERT(curAndConstraint
->rangeList
== NULL
);
489 U_ASSERT(curAndConstraint
!= NULL
);
490 curAndConstraint
->negated
=TRUE
;
494 curAndConstraint
->negated
=TRUE
;
499 U_ASSERT(curAndConstraint
!= NULL
);
500 curAndConstraint
->rangeList
= new UVector32(status
);
501 curAndConstraint
->rangeList
->addElement(-1, status
); // range Low
502 curAndConstraint
->rangeList
->addElement(-1, status
); // range Hi
505 curAndConstraint
->value
=PLURAL_RANGE_HIGH
;
506 curAndConstraint
->integerOnly
= (type
!= tWithin
);
509 U_ASSERT(curAndConstraint
!= NULL
);
510 if ( (curAndConstraint
->op
==AndConstraint::MOD
)&&
511 (curAndConstraint
->opNum
== -1 ) ) {
512 curAndConstraint
->opNum
=getNumberValue(token
);
515 if (curAndConstraint
->rangeList
== NULL
) {
516 // this is for an 'is' rule
517 curAndConstraint
->value
= getNumberValue(token
);
519 // this is for an 'in' or 'within' rule
520 if (curAndConstraint
->rangeList
->elementAti(rangeLowIdx
) == -1) {
521 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeLowIdx
);
522 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeHiIdx
);
525 curAndConstraint
->rangeList
->setElementAt(getNumberValue(token
), rangeHiIdx
);
526 if (curAndConstraint
->rangeList
->elementAti(rangeLowIdx
) >
527 curAndConstraint
->rangeList
->elementAti(rangeHiIdx
)) {
528 // Range Lower bound > Range Upper bound.
529 // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
530 // used for all plural rule parse errors.
531 status
= U_UNEXPECTED_TOKEN
;
539 // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
540 // Catch cases like "n mod 10, is 1" here instead.
541 if (curAndConstraint
== NULL
|| curAndConstraint
->rangeList
== NULL
) {
542 status
= U_UNEXPECTED_TOKEN
;
545 U_ASSERT(curAndConstraint
->rangeList
->size() >= 2);
546 rangeLowIdx
= curAndConstraint
->rangeList
->size();
547 curAndConstraint
->rangeList
->addElement(-1, status
); // range Low
548 rangeHiIdx
= curAndConstraint
->rangeList
->size();
549 curAndConstraint
->rangeList
->addElement(-1, status
); // range Hi
552 U_ASSERT(curAndConstraint
!= NULL
);
553 curAndConstraint
->op
=AndConstraint::MOD
;
560 U_ASSERT(curAndConstraint
!= NULL
);
561 curAndConstraint
->digitsType
= type
;
565 RuleChain
*newChain
= new RuleChain
;
566 if (newChain
== NULL
) {
567 status
= U_MEMORY_ALLOCATION_ERROR
;
570 newChain
->fKeyword
= token
;
571 if (prules
->mRules
== NULL
) {
572 prules
->mRules
= newChain
;
574 // The new rule chain goes at the end of the linked list of rule chains,
575 // unless there is an "other" keyword & chain. "other" must remain last.
576 RuleChain
*insertAfter
= prules
->mRules
;
577 while (insertAfter
->fNext
!=NULL
&&
578 insertAfter
->fNext
->fKeyword
.compare(PLURAL_KEYWORD_OTHER
, 5) != 0 ){
579 insertAfter
=insertAfter
->fNext
;
581 newChain
->fNext
= insertAfter
->fNext
;
582 insertAfter
->fNext
= newChain
;
584 OrConstraint
*orNode
= new OrConstraint();
585 newChain
->ruleHeader
= orNode
;
586 curAndConstraint
= orNode
->add();
587 currentChain
= newChain
;
593 getNextToken(status
);
594 if (U_FAILURE(status
) || type
== tSemiColon
|| type
== tEOF
|| type
== tAt
) {
597 if (type
== tEllipsis
) {
598 currentChain
->fIntegerSamplesUnbounded
= TRUE
;
601 currentChain
->fIntegerSamples
.append(token
);
607 getNextToken(status
);
608 if (U_FAILURE(status
) || type
== tSemiColon
|| type
== tEOF
|| type
== tAt
) {
611 if (type
== tEllipsis
) {
612 currentChain
->fDecimalSamplesUnbounded
= TRUE
;
615 currentChain
->fDecimalSamples
.append(token
);
623 if (U_FAILURE(status
)) {
630 PluralRules::getRuleFromResource(const Locale
& locale
, UPluralType type
, UErrorCode
& errCode
) {
631 UnicodeString emptyStr
;
633 if (U_FAILURE(errCode
)) {
636 LocalUResourceBundlePointer
rb(ures_openDirect(NULL
, "plurals", &errCode
));
637 if(U_FAILURE(errCode
)) {
642 case UPLURAL_TYPE_CARDINAL
:
645 case UPLURAL_TYPE_ORDINAL
:
646 typeKey
= "locales_ordinals";
649 // Must not occur: The caller should have checked for valid types.
650 errCode
= U_ILLEGAL_ARGUMENT_ERROR
;
653 LocalUResourceBundlePointer
locRes(ures_getByKey(rb
.getAlias(), typeKey
, NULL
, &errCode
));
654 if(U_FAILURE(errCode
)) {
658 const char *curLocaleName
=locale
.getName();
659 const UChar
* s
= ures_getStringByKey(locRes
.getAlias(), curLocaleName
, &resLen
, &errCode
);
662 // Check parent locales.
663 UErrorCode status
= U_ZERO_ERROR
;
664 char parentLocaleName
[ULOC_FULLNAME_CAPACITY
];
665 const char *curLocaleName
=locale
.getName();
666 uprv_strcpy(parentLocaleName
, curLocaleName
);
668 while (uloc_getParent(parentLocaleName
, parentLocaleName
,
669 ULOC_FULLNAME_CAPACITY
, &status
) > 0) {
671 s
= ures_getStringByKey(locRes
.getAlias(), parentLocaleName
, &resLen
, &status
);
673 errCode
= U_ZERO_ERROR
;
676 status
= U_ZERO_ERROR
;
684 u_UCharsToChars(s
, setKey
, resLen
+ 1);
685 // printf("\n PluralRule: %s\n", setKey);
687 LocalUResourceBundlePointer
ruleRes(ures_getByKey(rb
.getAlias(), "rules", NULL
, &errCode
));
688 if(U_FAILURE(errCode
)) {
691 LocalUResourceBundlePointer
setRes(ures_getByKey(ruleRes
.getAlias(), setKey
, NULL
, &errCode
));
692 if (U_FAILURE(errCode
)) {
696 int32_t numberKeys
= ures_getSize(setRes
.getAlias());
697 UnicodeString result
;
698 const char *key
=NULL
;
699 for(int32_t i
=0; i
<numberKeys
; ++i
) { // Keys are zero, one, few, ...
700 UnicodeString rules
= ures_getNextUnicodeString(setRes
.getAlias(), &key
, &errCode
);
701 UnicodeString
uKey(key
, -1, US_INV
);
703 result
.append(COLON
);
704 result
.append(rules
);
705 result
.append(SEMI_COLON
);
712 PluralRules::getRules() const {
714 if (mRules
!= NULL
) {
715 mRules
->dumpRules(rules
);
721 AndConstraint::AndConstraint() {
722 op
= AndConstraint::NONE
;
733 AndConstraint::AndConstraint(const AndConstraint
& other
) {
735 this->opNum
=other
.opNum
;
736 this->value
=other
.value
;
737 this->rangeList
=NULL
;
738 if (other
.rangeList
!= NULL
) {
739 UErrorCode status
= U_ZERO_ERROR
;
740 this->rangeList
= new UVector32(status
);
741 this->rangeList
->assign(*other
.rangeList
, status
);
743 this->integerOnly
=other
.integerOnly
;
744 this->negated
=other
.negated
;
745 this->digitsType
= other
.digitsType
;
746 if (other
.next
==NULL
) {
750 this->next
= new AndConstraint(*other
.next
);
754 AndConstraint::~AndConstraint() {
763 AndConstraint::isFulfilled(const FixedDecimal
&number
) {
765 if (digitsType
== none
) {
766 // An empty AndConstraint, created by a rule with a keyword but no following expression.
769 double n
= number
.get(digitsType
); // pulls n | i | v | f value for the number.
770 // Will always be positive.
771 // May be non-integer (n option only)
773 if (integerOnly
&& n
!= uprv_floor(n
)) {
781 if (rangeList
== NULL
) {
782 result
= value
== -1 || // empty rule
783 n
== value
; // 'is' rule
786 result
= FALSE
; // 'in' or 'within' rule
787 for (int32_t r
=0; r
<rangeList
->size(); r
+=2) {
788 if (rangeList
->elementAti(r
) <= n
&& n
<= rangeList
->elementAti(r
+1)) {
805 this->next
= new AndConstraint();
809 OrConstraint::OrConstraint() {
814 OrConstraint::OrConstraint(const OrConstraint
& other
) {
815 if ( other
.childNode
== NULL
) {
816 this->childNode
= NULL
;
819 this->childNode
= new AndConstraint(*(other
.childNode
));
821 if (other
.next
== NULL
) {
825 this->next
= new OrConstraint(*(other
.next
));
829 OrConstraint::~OrConstraint() {
830 if (childNode
!=NULL
) {
841 OrConstraint
*curOrConstraint
=this;
843 while (curOrConstraint
->next
!=NULL
) {
844 curOrConstraint
= curOrConstraint
->next
;
846 U_ASSERT(curOrConstraint
->childNode
== NULL
);
847 curOrConstraint
->childNode
= new AndConstraint();
849 return curOrConstraint
->childNode
;
853 OrConstraint::isFulfilled(const FixedDecimal
&number
) {
854 OrConstraint
* orRule
=this;
857 while (orRule
!=NULL
&& !result
) {
859 AndConstraint
* andRule
= orRule
->childNode
;
860 while (andRule
!=NULL
&& result
) {
861 result
= andRule
->isFulfilled(number
);
862 andRule
=andRule
->next
;
864 orRule
= orRule
->next
;
871 RuleChain::RuleChain(): fKeyword(), fNext(NULL
), ruleHeader(NULL
), fDecimalSamples(), fIntegerSamples(),
872 fDecimalSamplesUnbounded(FALSE
), fIntegerSamplesUnbounded(FALSE
) {
875 RuleChain::RuleChain(const RuleChain
& other
) :
876 fKeyword(other
.fKeyword
), fNext(NULL
), ruleHeader(NULL
), fDecimalSamples(other
.fDecimalSamples
),
877 fIntegerSamples(other
.fIntegerSamples
), fDecimalSamplesUnbounded(other
.fDecimalSamplesUnbounded
),
878 fIntegerSamplesUnbounded(other
.fIntegerSamplesUnbounded
) {
879 if (other
.ruleHeader
!= NULL
) {
880 this->ruleHeader
= new OrConstraint(*(other
.ruleHeader
));
882 if (other
.fNext
!= NULL
) {
883 this->fNext
= new RuleChain(*other
.fNext
);
887 RuleChain::~RuleChain() {
894 RuleChain::select(const FixedDecimal
&number
) const {
895 if (!number
.isNanOrInfinity
) {
896 for (const RuleChain
*rules
= this; rules
!= NULL
; rules
= rules
->fNext
) {
897 if (rules
->ruleHeader
->isFulfilled(number
)) {
898 return rules
->fKeyword
;
902 return UnicodeString(TRUE
, PLURAL_KEYWORD_OTHER
, 5);
905 static UnicodeString
tokenString(tokenType tok
) {
909 s
.append(LOW_N
); break;
911 s
.append(LOW_I
); break;
913 s
.append(LOW_F
); break;
915 s
.append(LOW_V
); break;
917 s
.append(LOW_T
); break;
925 RuleChain::dumpRules(UnicodeString
& result
) {
926 UChar digitString
[16];
928 if ( ruleHeader
!= NULL
) {
932 OrConstraint
* orRule
=ruleHeader
;
933 while ( orRule
!= NULL
) {
934 AndConstraint
* andRule
=orRule
->childNode
;
935 while ( andRule
!= NULL
) {
936 if ((andRule
->op
==AndConstraint::NONE
) && (andRule
->rangeList
==NULL
) && (andRule
->value
== -1)) {
938 } else if ( (andRule
->op
==AndConstraint::NONE
) && (andRule
->rangeList
==NULL
) ) {
939 result
+= tokenString(andRule
->digitsType
);
940 result
+= UNICODE_STRING_SIMPLE(" is ");
941 if (andRule
->negated
) {
942 result
+= UNICODE_STRING_SIMPLE("not ");
944 uprv_itou(digitString
,16, andRule
->value
,10,0);
945 result
+= UnicodeString(digitString
);
948 result
+= tokenString(andRule
->digitsType
);
950 if (andRule
->op
==AndConstraint::MOD
) {
951 result
+= UNICODE_STRING_SIMPLE("mod ");
952 uprv_itou(digitString
,16, andRule
->opNum
,10,0);
953 result
+= UnicodeString(digitString
);
955 if (andRule
->rangeList
==NULL
) {
956 if (andRule
->negated
) {
957 result
+= UNICODE_STRING_SIMPLE(" is not ");
958 uprv_itou(digitString
,16, andRule
->value
,10,0);
959 result
+= UnicodeString(digitString
);
962 result
+= UNICODE_STRING_SIMPLE(" is ");
963 uprv_itou(digitString
,16, andRule
->value
,10,0);
964 result
+= UnicodeString(digitString
);
968 if (andRule
->negated
) {
969 if ( andRule
->integerOnly
) {
970 result
+= UNICODE_STRING_SIMPLE(" not in ");
973 result
+= UNICODE_STRING_SIMPLE(" not within ");
977 if ( andRule
->integerOnly
) {
978 result
+= UNICODE_STRING_SIMPLE(" in ");
981 result
+= UNICODE_STRING_SIMPLE(" within ");
984 for (int32_t r
=0; r
<andRule
->rangeList
->size(); r
+=2) {
985 int32_t rangeLo
= andRule
->rangeList
->elementAti(r
);
986 int32_t rangeHi
= andRule
->rangeList
->elementAti(r
+1);
987 uprv_itou(digitString
,16, rangeLo
, 10, 0);
988 result
+= UnicodeString(digitString
);
989 result
+= UNICODE_STRING_SIMPLE("..");
990 uprv_itou(digitString
,16, rangeHi
, 10,0);
991 result
+= UnicodeString(digitString
);
992 if (r
+2 < andRule
->rangeList
->size()) {
993 result
+= UNICODE_STRING_SIMPLE(", ");
998 if ( (andRule
=andRule
->next
) != NULL
) {
999 result
+= UNICODE_STRING_SIMPLE(" and ");
1002 if ( (orRule
= orRule
->next
) != NULL
) {
1003 result
+= UNICODE_STRING_SIMPLE(" or ");
1007 if ( fNext
!= NULL
) {
1008 result
+= UNICODE_STRING_SIMPLE("; ");
1009 fNext
->dumpRules(result
);
1015 RuleChain::getKeywords(int32_t capacityOfKeywords
, UnicodeString
* keywords
, int32_t& arraySize
) const {
1016 if ( arraySize
< capacityOfKeywords
-1 ) {
1017 keywords
[arraySize
++]=fKeyword
;
1020 return U_BUFFER_OVERFLOW_ERROR
;
1023 if ( fNext
!= NULL
) {
1024 return fNext
->getKeywords(capacityOfKeywords
, keywords
, arraySize
);
1027 return U_ZERO_ERROR
;
1032 RuleChain::isKeyword(const UnicodeString
& keywordParam
) const {
1033 if ( fKeyword
== keywordParam
) {
1037 if ( fNext
!= NULL
) {
1038 return fNext
->isKeyword(keywordParam
);
1046 PluralRuleParser::PluralRuleParser() :
1047 ruleIndex(0), token(), type(none
), prevType(none
),
1048 curAndConstraint(NULL
), currentChain(NULL
), rangeLowIdx(-1), rangeHiIdx(-1)
1052 PluralRuleParser::~PluralRuleParser() {
1057 PluralRuleParser::getNumberValue(const UnicodeString
& token
) {
1061 i
= token
.extract(0, token
.length(), digits
, UPRV_LENGTHOF(digits
), US_INV
);
1064 return((int32_t)atoi(digits
));
1069 PluralRuleParser::checkSyntax(UErrorCode
&status
)
1071 if (U_FAILURE(status
)) {
1074 if (!(prevType
==none
|| prevType
==tSemiColon
)) {
1075 type
= getKeyType(token
, type
); // Switch token type from tKeyword if we scanned a reserved word,
1076 // and we are not at the start of a rule, where a
1077 // keyword is expected.
1083 if (type
!=tKeyword
&& type
!= tEOF
) {
1084 status
= U_UNEXPECTED_TOKEN
;
1092 if (type
!= tIs
&& type
!= tMod
&& type
!= tIn
&&
1093 type
!= tNot
&& type
!= tWithin
&& type
!= tEqual
&& type
!= tNotEqual
) {
1094 status
= U_UNEXPECTED_TOKEN
;
1098 if (type
!= tColon
) {
1099 status
= U_UNEXPECTED_TOKEN
;
1103 if (!(type
== tVariableN
||
1104 type
== tVariableI
||
1105 type
== tVariableF
||
1106 type
== tVariableT
||
1107 type
== tVariableV
||
1109 status
= U_UNEXPECTED_TOKEN
;
1113 if ( type
!= tNumber
&& type
!= tNot
) {
1114 status
= U_UNEXPECTED_TOKEN
;
1118 if (type
!= tNumber
&& type
!= tIn
&& type
!= tWithin
) {
1119 status
= U_UNEXPECTED_TOKEN
;
1128 if (type
!= tNumber
) {
1129 status
= U_UNEXPECTED_TOKEN
;
1134 if ( type
!= tVariableN
&&
1135 type
!= tVariableI
&&
1136 type
!= tVariableF
&&
1137 type
!= tVariableT
&&
1138 type
!= tVariableV
) {
1139 status
= U_UNEXPECTED_TOKEN
;
1143 if (type
!= tNumber
) {
1144 status
= U_UNEXPECTED_TOKEN
;
1148 if (type
!= tDot2
&& type
!= tSemiColon
&& type
!= tIs
&& type
!= tNot
&&
1149 type
!= tIn
&& type
!= tEqual
&& type
!= tNotEqual
&& type
!= tWithin
&&
1150 type
!= tAnd
&& type
!= tOr
&& type
!= tComma
&& type
!= tAt
&&
1153 status
= U_UNEXPECTED_TOKEN
;
1155 // TODO: a comma following a number that is not part of a range will be allowed.
1156 // It's not the only case of this sort of thing. Parser needs a re-write.
1159 if (type
!= tDecimal
&& type
!= tInteger
) {
1160 status
= U_UNEXPECTED_TOKEN
;
1164 status
= U_UNEXPECTED_TOKEN
;
1171 * Scan the next token from the input rules.
1172 * rules and returned token type are in the parser state variables.
1175 PluralRuleParser::getNextToken(UErrorCode
&status
)
1177 if (U_FAILURE(status
)) {
1182 while (ruleIndex
< ruleSrc
->length()) {
1183 ch
= ruleSrc
->charAt(ruleIndex
);
1184 type
= charType(ch
);
1185 if (type
!= tSpace
) {
1190 if (ruleIndex
>= ruleSrc
->length()) {
1194 int32_t curIndex
= ruleIndex
;
1201 case tTilde
: // scanned '~'
1202 case tAt
: // scanned '@'
1203 case tEqual
: // scanned '='
1204 case tMod
: // scanned '%'
1205 // Single character tokens.
1209 case tNotEqual
: // scanned '!'
1210 if (ruleSrc
->charAt(curIndex
+1) == EQUALS
) {
1219 while (type
== tKeyword
&& ++curIndex
< ruleSrc
->length()) {
1220 ch
= ruleSrc
->charAt(curIndex
);
1221 type
= charType(ch
);
1227 while (type
== tNumber
&& ++curIndex
< ruleSrc
->length()) {
1228 ch
= ruleSrc
->charAt(curIndex
);
1229 type
= charType(ch
);
1235 // We could be looking at either ".." in a range, or "..." at the end of a sample.
1236 if (curIndex
+1 >= ruleSrc
->length() || ruleSrc
->charAt(curIndex
+1) != DOT
) {
1238 break; // Single dot
1240 if (curIndex
+2 >= ruleSrc
->length() || ruleSrc
->charAt(curIndex
+2) != DOT
) {
1243 break; // double dot
1247 break; // triple dot
1250 status
= U_UNEXPECTED_TOKEN
;
1255 U_ASSERT(ruleIndex
<= ruleSrc
->length());
1256 U_ASSERT(curIndex
<= ruleSrc
->length());
1257 token
=UnicodeString(*ruleSrc
, ruleIndex
, curIndex
-ruleIndex
);
1258 ruleIndex
= curIndex
;
1262 PluralRuleParser::charType(UChar ch
) {
1263 if ((ch
>=U_ZERO
) && (ch
<=U_NINE
)) {
1266 if (ch
>=LOW_A
&& ch
<=LOW_Z
) {
1298 // Set token type for reserved words in the Plural Rule syntax.
1301 PluralRuleParser::getKeyType(const UnicodeString
&token
, tokenType keyType
)
1303 if (keyType
!= tKeyword
) {
1307 if (0 == token
.compare(PK_VAR_N
, 1)) {
1308 keyType
= tVariableN
;
1309 } else if (0 == token
.compare(PK_VAR_I
, 1)) {
1310 keyType
= tVariableI
;
1311 } else if (0 == token
.compare(PK_VAR_F
, 1)) {
1312 keyType
= tVariableF
;
1313 } else if (0 == token
.compare(PK_VAR_T
, 1)) {
1314 keyType
= tVariableT
;
1315 } else if (0 == token
.compare(PK_VAR_V
, 1)) {
1316 keyType
= tVariableV
;
1317 } else if (0 == token
.compare(PK_IS
, 2)) {
1319 } else if (0 == token
.compare(PK_AND
, 3)) {
1321 } else if (0 == token
.compare(PK_IN
, 2)) {
1323 } else if (0 == token
.compare(PK_WITHIN
, 6)) {
1325 } else if (0 == token
.compare(PK_NOT
, 3)) {
1327 } else if (0 == token
.compare(PK_MOD
, 3)) {
1329 } else if (0 == token
.compare(PK_OR
, 2)) {
1331 } else if (0 == token
.compare(PK_DECIMAL
, 7)) {
1333 } else if (0 == token
.compare(PK_INTEGER
, 7)) {
1340 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain
*header
, UErrorCode
& status
)
1341 : pos(0), fKeywordNames(status
) {
1342 if (U_FAILURE(status
)) {
1345 fKeywordNames
.setDeleter(uprv_deleteUObject
);
1346 UBool addKeywordOther
=TRUE
;
1347 RuleChain
*node
=header
;
1349 fKeywordNames
.addElement(new UnicodeString(node
->fKeyword
), status
);
1350 if (U_FAILURE(status
)) {
1353 if (0 == node
->fKeyword
.compare(PLURAL_KEYWORD_OTHER
, 5)) {
1354 addKeywordOther
= FALSE
;
1359 if (addKeywordOther
) {
1360 fKeywordNames
.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER
), status
);
1364 const UnicodeString
*
1365 PluralKeywordEnumeration::snext(UErrorCode
& status
) {
1366 if (U_SUCCESS(status
) && pos
< fKeywordNames
.size()) {
1367 return (const UnicodeString
*)fKeywordNames
.elementAt(pos
++);
1373 PluralKeywordEnumeration::reset(UErrorCode
& /*status*/) {
1378 PluralKeywordEnumeration::count(UErrorCode
& /*status*/) const {
1379 return fKeywordNames
.size();
1382 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1385 FixedDecimal::FixedDecimal(const VisibleDigits
&digits
) {
1386 digits
.getFixedDecimal(
1387 source
, intValue
, decimalDigits
,
1388 decimalDigitsWithoutTrailingZeros
,
1389 visibleDecimalDigitCount
, hasIntegerValue
);
1390 isNegative
= digits
.isNegative();
1391 isNanOrInfinity
= digits
.isNaNOrInfinity();
1394 FixedDecimal::FixedDecimal(double n
, int32_t v
, int64_t f
) {
1396 // check values. TODO make into unit test.
1398 // long visiblePower = (int) Math.pow(10, v);
1399 // if (decimalDigits > visiblePower) {
1400 // throw new IllegalArgumentException();
1402 // double fraction = intValue + (decimalDigits / (double) visiblePower);
1403 // if (fraction != source) {
1404 // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1405 // if (diff > 0.00000001d) {
1406 // throw new IllegalArgumentException();
1411 FixedDecimal::FixedDecimal(double n
, int32_t v
) {
1412 // Ugly, but for samples we don't care.
1413 init(n
, v
, getFractionalDigits(n
, v
));
1416 FixedDecimal::FixedDecimal(double n
) {
1420 FixedDecimal::FixedDecimal() {
1425 // Create a FixedDecimal from a UnicodeString containing a number.
1426 // Inefficient, but only used for samples, so simplicity trumps efficiency.
1428 FixedDecimal::FixedDecimal(const UnicodeString
&num
, UErrorCode
&status
) {
1430 cs
.appendInvariantChars(num
, status
);
1432 dl
.set(cs
.toStringPiece(), status
);
1433 if (U_FAILURE(status
)) {
1437 int32_t decimalPoint
= num
.indexOf(DOT
);
1438 double n
= dl
.getDouble();
1439 if (decimalPoint
== -1) {
1442 int32_t v
= num
.length() - decimalPoint
- 1;
1443 init(n
, v
, getFractionalDigits(n
, v
));
1448 FixedDecimal::FixedDecimal(const FixedDecimal
&other
) {
1449 source
= other
.source
;
1450 visibleDecimalDigitCount
= other
.visibleDecimalDigitCount
;
1451 decimalDigits
= other
.decimalDigits
;
1452 decimalDigitsWithoutTrailingZeros
= other
.decimalDigitsWithoutTrailingZeros
;
1453 intValue
= other
.intValue
;
1454 hasIntegerValue
= other
.hasIntegerValue
;
1455 isNegative
= other
.isNegative
;
1456 isNanOrInfinity
= other
.isNanOrInfinity
;
1460 void FixedDecimal::init(double n
) {
1461 int32_t numFractionDigits
= decimals(n
);
1462 init(n
, numFractionDigits
, getFractionalDigits(n
, numFractionDigits
));
1466 void FixedDecimal::init(double n
, int32_t v
, int64_t f
) {
1467 isNegative
= n
< 0.0;
1469 isNanOrInfinity
= uprv_isNaN(source
) || uprv_isPositiveInfinity(source
);
1470 if (isNanOrInfinity
) {
1474 hasIntegerValue
= FALSE
;
1476 intValue
= (int64_t)source
;
1477 hasIntegerValue
= (source
== intValue
);
1480 visibleDecimalDigitCount
= v
;
1483 decimalDigitsWithoutTrailingZeros
= 0;
1486 while ((fdwtz%10
) == 0) {
1489 decimalDigitsWithoutTrailingZeros
= fdwtz
;
1494 // Fast path only exact initialization. Return true if successful.
1495 // Note: Do not multiply by 10 each time through loop, rounding cruft can build
1496 // up that makes the check for an integer result fail.
1497 // A single multiply of the original number works more reliably.
1498 static int32_t p10
[] = {1, 10, 100, 1000, 10000};
1499 UBool
FixedDecimal::quickInit(double n
) {
1500 UBool success
= FALSE
;
1502 int32_t numFractionDigits
;
1503 for (numFractionDigits
= 0; numFractionDigits
<= 3; numFractionDigits
++) {
1504 double scaledN
= n
* p10
[numFractionDigits
];
1505 if (scaledN
== floor(scaledN
)) {
1511 init(n
, numFractionDigits
, getFractionalDigits(n
, numFractionDigits
));
1518 int32_t FixedDecimal::decimals(double n
) {
1519 // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1520 // fastpath the common cases, integers or fractions with 3 or fewer digits
1522 for (int ndigits
=0; ndigits
<=3; ndigits
++) {
1523 double scaledN
= n
* p10
[ndigits
];
1524 if (scaledN
== floor(scaledN
)) {
1529 // Slow path, convert with sprintf, parse converted output.
1531 sprintf(buf
, "%1.15e", n
);
1532 // formatted number looks like this: 1.234567890123457e-01
1533 int exponent
= atoi(buf
+18);
1534 int numFractionDigits
= 15;
1535 for (int i
=16; ; --i
) {
1536 if (buf
[i
] != '0') {
1539 --numFractionDigits
;
1541 numFractionDigits
-= exponent
; // Fraction part of fixed point representation.
1542 return numFractionDigits
;
1546 // Get the fraction digits of a double, represented as an integer.
1547 // v is the number of visible fraction digits in the displayed form of the number.
1548 // Example: n = 1001.234, v = 6, result = 234000
1549 // TODO: need to think through how this is used in the plural rule context.
1550 // This function can easily encounter integer overflow,
1551 // and can easily return noise digits when the precision of a double is exceeded.
1553 int64_t FixedDecimal::getFractionalDigits(double n
, int32_t v
) {
1554 if (v
== 0 || n
== floor(n
) || uprv_isNaN(n
) || uprv_isPositiveInfinity(n
)) {
1558 double fract
= n
- floor(n
);
1560 case 1: return (int64_t)(fract
*10.0 + 0.5);
1561 case 2: return (int64_t)(fract
*100.0 + 0.5);
1562 case 3: return (int64_t)(fract
*1000.0 + 0.5);
1564 double scaled
= floor(fract
* pow(10.0, (double)v
) + 0.5);
1565 if (scaled
> U_INT64_MAX
) {
1568 return (int64_t)scaled
;
1574 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits
) {
1575 int32_t numTrailingFractionZeros
= minFractionDigits
- visibleDecimalDigitCount
;
1576 if (numTrailingFractionZeros
> 0) {
1577 for (int32_t i
=0; i
<numTrailingFractionZeros
; i
++) {
1578 // Do not let the decimalDigits value overflow if there are many trailing zeros.
1579 // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1580 if (decimalDigits
>= 100000000000000000LL) {
1583 decimalDigits
*= 10;
1585 visibleDecimalDigitCount
+= numTrailingFractionZeros
;
1590 double FixedDecimal::get(tokenType operand
) const {
1592 case tVariableN
: return source
;
1593 case tVariableI
: return (double)intValue
;
1594 case tVariableF
: return (double)decimalDigits
;
1595 case tVariableT
: return (double)decimalDigitsWithoutTrailingZeros
;
1596 case tVariableV
: return visibleDecimalDigitCount
;
1598 U_ASSERT(FALSE
); // unexpected.
1603 int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1604 return visibleDecimalDigitCount
;
1609 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode
&status
) {
1612 fOpenStatus
= status
;
1613 if (U_FAILURE(status
)) {
1616 fOpenStatus
= U_ZERO_ERROR
;
1617 LocalUResourceBundlePointer
rb(ures_openDirect(NULL
, "plurals", &fOpenStatus
));
1618 fLocales
= ures_getByKey(rb
.getAlias(), "locales", NULL
, &fOpenStatus
);
1621 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1622 ures_close(fLocales
);
1628 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength
, UErrorCode
&status
) {
1629 if (U_FAILURE(status
)) {
1632 if (U_FAILURE(fOpenStatus
)) {
1633 status
= fOpenStatus
;
1636 fRes
= ures_getNextResource(fLocales
, fRes
, &status
);
1637 if (fRes
== NULL
|| U_FAILURE(status
)) {
1638 if (status
== U_INDEX_OUTOFBOUNDS_ERROR
) {
1639 status
= U_ZERO_ERROR
;
1643 const char *result
= ures_getKey(fRes
);
1644 if (resultLength
!= NULL
) {
1645 *resultLength
= uprv_strlen(result
);
1651 void PluralAvailableLocalesEnumeration::reset(UErrorCode
&status
) {
1652 if (U_FAILURE(status
)) {
1655 if (U_FAILURE(fOpenStatus
)) {
1656 status
= fOpenStatus
;
1659 ures_resetIterator(fLocales
);
1662 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode
&status
) const {
1663 if (U_FAILURE(status
)) {
1666 if (U_FAILURE(fOpenStatus
)) {
1667 status
= fOpenStatus
;
1670 return ures_getSize(fLocales
);
1676 #endif /* #if !UCONFIG_NO_FORMATTING */