]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/plurrule.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / plurrule.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
46f4442e
A
3/*
4*******************************************************************************
2ca993e8 5* Copyright (C) 2007-2016, International Business Machines Corporation and
46f4442e
A
6* others. All Rights Reserved.
7*******************************************************************************
8*
51004dcb 9* File plurrule.cpp
46f4442e
A
10*/
11
57a6839d
A
12#include <math.h>
13#include <stdio.h>
14
46f4442e 15#include "unicode/utypes.h"
4388f060 16#include "unicode/localpointer.h"
46f4442e 17#include "unicode/plurrule.h"
51004dcb 18#include "unicode/upluralrules.h"
4388f060 19#include "unicode/ures.h"
f3c0d7a5
A
20#include "unicode/numfmt.h"
21#include "unicode/decimfmt.h"
57a6839d 22#include "charstr.h"
46f4442e
A
23#include "cmemory.h"
24#include "cstring.h"
57a6839d 25#include "digitlst.h"
46f4442e 26#include "hash.h"
57a6839d 27#include "locutil.h"
46f4442e 28#include "mutex.h"
4388f060 29#include "patternprops.h"
46f4442e
A
30#include "plurrule_impl.h"
31#include "putilimp.h"
32#include "ucln_in.h"
33#include "ustrfmt.h"
4388f060 34#include "uassert.h"
57a6839d
A
35#include "uvectr32.h"
36#include "sharedpluralrules.h"
b331163b 37#include "unifiedcache.h"
2ca993e8
A
38#include "digitinterval.h"
39#include "visibledigits.h"
40
46f4442e
A
41#if !UCONFIG_NO_FORMATTING
42
57a6839d 43U_NAMESPACE_BEGIN
46f4442e 44
46f4442e
A
45static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
46static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
47static const UChar PK_IN[]={LOW_I,LOW_N,0};
48static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
49static const UChar PK_IS[]={LOW_I,LOW_S,0};
50static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
51static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
52static const UChar PK_OR[]={LOW_O,LOW_R,0};
53static const UChar PK_VAR_N[]={LOW_N,0};
57a6839d
A
54static const UChar PK_VAR_I[]={LOW_I,0};
55static const UChar PK_VAR_F[]={LOW_F,0};
56static const UChar PK_VAR_T[]={LOW_T,0};
57static const UChar PK_VAR_V[]={LOW_V,0};
46f4442e 58static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
57a6839d
A
59static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0};
60static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0};
46f4442e
A
61
62UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
63UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
64
57a6839d 65PluralRules::PluralRules(UErrorCode& /*status*/)
46f4442e 66: UObject(),
57a6839d 67 mRules(NULL)
46f4442e 68{
46f4442e
A
69}
70
71PluralRules::PluralRules(const PluralRules& other)
72: UObject(other),
57a6839d 73 mRules(NULL)
46f4442e
A
74{
75 *this=other;
76}
77
78PluralRules::~PluralRules() {
79 delete mRules;
57a6839d
A
80}
81
82SharedPluralRules::~SharedPluralRules() {
83 delete ptr;
46f4442e
A
84}
85
86PluralRules*
87PluralRules::clone() const {
88 return new PluralRules(*this);
89}
90
91PluralRules&
92PluralRules::operator=(const PluralRules& other) {
93 if (this != &other) {
94 delete mRules;
95 if (other.mRules==NULL) {
96 mRules = NULL;
97 }
98 else {
99 mRules = new RuleChain(*other.mRules);
100 }
46f4442e
A
101 }
102
103 return *this;
104}
105
57a6839d
A
106StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) {
107 StringEnumeration *result = new PluralAvailableLocalesEnumeration(status);
108 if (result == NULL && U_SUCCESS(status)) {
109 status = U_MEMORY_ALLOCATION_ERROR;
110 }
111 if (U_FAILURE(status)) {
112 delete result;
113 result = NULL;
114 }
115 return result;
116}
117
118
46f4442e
A
119PluralRules* U_EXPORT2
120PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
729e4ab9
A
121 if (U_FAILURE(status)) {
122 return NULL;
123 }
57a6839d
A
124
125 PluralRuleParser parser;
46f4442e 126 PluralRules *newRules = new PluralRules(status);
57a6839d
A
127 if (U_SUCCESS(status) && newRules == NULL) {
128 status = U_MEMORY_ALLOCATION_ERROR;
46f4442e 129 }
57a6839d 130 parser.parse(description, newRules, status);
46f4442e
A
131 if (U_FAILURE(status)) {
132 delete newRules;
57a6839d 133 newRules = NULL;
46f4442e 134 }
57a6839d 135 return newRules;
46f4442e
A
136}
137
57a6839d 138
46f4442e
A
139PluralRules* U_EXPORT2
140PluralRules::createDefaultRules(UErrorCode& status) {
4388f060 141 return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
46f4442e
A
142}
143
57a6839d
A
144/******************************************************************************/
145/* Create PluralRules cache */
146
b331163b
A
147template<> U_I18N_API
148const SharedPluralRules *LocaleCacheKey<SharedPluralRules>::createObject(
149 const void * /*unused*/, UErrorCode &status) const {
150 const char *localeId = fLoc.getName();
57a6839d
A
151 PluralRules *pr = PluralRules::internalForLocale(
152 localeId, UPLURAL_TYPE_CARDINAL, status);
153 if (U_FAILURE(status)) {
154 return NULL;
155 }
b331163b 156 SharedPluralRules *result = new SharedPluralRules(pr);
57a6839d
A
157 if (result == NULL) {
158 status = U_MEMORY_ALLOCATION_ERROR;
159 delete pr;
160 return NULL;
161 }
b331163b 162 result->addRef();
57a6839d
A
163 return result;
164}
165
57a6839d
A
166/* end plural rules cache */
167/******************************************************************************/
168
169const SharedPluralRules* U_EXPORT2
170PluralRules::createSharedInstance(
171 const Locale& locale, UPluralType type, UErrorCode& status) {
172 if (U_FAILURE(status)) {
173 return NULL;
174 }
175 if (type != UPLURAL_TYPE_CARDINAL) {
176 status = U_UNSUPPORTED_ERROR;
177 return NULL;
178 }
179 const SharedPluralRules *result = NULL;
b331163b 180 UnifiedCache::getByLocale(locale, result, status);
57a6839d
A
181 return result;
182}
183
46f4442e
A
184PluralRules* U_EXPORT2
185PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
51004dcb
A
186 return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
187}
188
189PluralRules* U_EXPORT2
190PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
57a6839d
A
191 if (type != UPLURAL_TYPE_CARDINAL) {
192 return internalForLocale(locale, type, status);
193 }
194 const SharedPluralRules *shared = createSharedInstance(
195 locale, type, status);
196 if (U_FAILURE(status)) {
197 return NULL;
198 }
199 PluralRules *result = (*shared)->clone();
200 shared->removeRef();
201 if (result == NULL) {
202 status = U_MEMORY_ALLOCATION_ERROR;
203 }
204 return result;
205}
206
207PluralRules* U_EXPORT2
208PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
729e4ab9
A
209 if (U_FAILURE(status)) {
210 return NULL;
211 }
51004dcb
A
212 if (type >= UPLURAL_TYPE_COUNT) {
213 status = U_ILLEGAL_ARGUMENT_ERROR;
214 return NULL;
215 }
46f4442e 216 PluralRules *newObj = new PluralRules(status);
729e4ab9 217 if (newObj==NULL || U_FAILURE(status)) {
4388f060 218 delete newObj;
46f4442e
A
219 return NULL;
220 }
51004dcb 221 UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
57a6839d
A
222 // TODO: which errors, if any, should be returned?
223 if (locRule.length() == 0) {
224 // Locales with no specific rules (all numbers have the "other" category
225 // will return a U_MISSING_RESOURCE_ERROR at this point. This is not
226 // an error.
227 locRule = UnicodeString(PLURAL_DEFAULT_RULE);
46f4442e 228 status = U_ZERO_ERROR;
46f4442e 229 }
57a6839d
A
230 PluralRuleParser parser;
231 parser.parse(locRule, newObj, status);
232 // TODO: should rule parse errors be returned, or
233 // should we silently use default rules?
234 // Original impl used default rules.
235 // Ask the question to ICU Core.
4388f060 236
46f4442e
A
237 return newObj;
238}
239
240UnicodeString
241PluralRules::select(int32_t number) const {
57a6839d 242 return select(FixedDecimal(number));
46f4442e
A
243}
244
245UnicodeString
246PluralRules::select(double number) const {
57a6839d
A
247 return select(FixedDecimal(number));
248}
249
f3c0d7a5
A
250UnicodeString
251PluralRules::select(const Formattable& obj, const NumberFormat& fmt, UErrorCode& status) const {
252 if (U_SUCCESS(status)) {
253 const DecimalFormat *decFmt = dynamic_cast<const DecimalFormat *>(&fmt);
254 if (decFmt != NULL) {
255 VisibleDigitsWithExponent digits;
256 decFmt->initVisibleDigitsWithExponent(obj, digits, status);
257 if (U_SUCCESS(status)) {
258 return select(digits);
259 }
260 } else {
261 double number = obj.getDouble(status);
262 if (U_SUCCESS(status)) {
263 return select(number);
264 }
265 }
266 }
267 return UnicodeString();
268}
269
57a6839d
A
270UnicodeString
271PluralRules::select(const FixedDecimal &number) const {
46f4442e 272 if (mRules == NULL) {
4388f060 273 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
46f4442e
A
274 }
275 else {
276 return mRules->select(number);
277 }
278}
279
2ca993e8
A
280UnicodeString
281PluralRules::select(const VisibleDigitsWithExponent &number) const {
282 if (number.getExponent() != NULL) {
283 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
284 }
285 return select(FixedDecimal(number.getMantissa()));
286}
287
288
289
46f4442e
A
290StringEnumeration*
291PluralRules::getKeywords(UErrorCode& status) const {
292 if (U_FAILURE(status)) return NULL;
293 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
4388f060
A
294 if (U_FAILURE(status)) {
295 delete nameEnumerator;
296 return NULL;
297 }
729e4ab9 298
46f4442e
A
299 return nameEnumerator;
300}
301
4388f060 302double
57a6839d
A
303PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) {
304 // Not Implemented.
305 return UPLRULES_NO_UNIQUE_VALUE;
4388f060
A
306}
307
308int32_t
57a6839d
A
309PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */,
310 int32_t /* destCapacity */, UErrorCode& error) {
311 error = U_UNSUPPORTED_ERROR;
312 return 0;
4388f060
A
313}
314
57a6839d
A
315
316static double scaleForInt(double d) {
317 double scale = 1.0;
318 while (d != floor(d)) {
319 d = d * 10.0;
320 scale = scale * 10.0;
4388f060 321 }
57a6839d
A
322 return scale;
323}
4388f060 324
57a6839d
A
325static int32_t
326getSamplesFromString(const UnicodeString &samples, double *dest,
327 int32_t destCapacity, UErrorCode& status) {
328 int32_t sampleCount = 0;
329 int32_t sampleStartIdx = 0;
330 int32_t sampleEndIdx = 0;
331
332 //std::string ss; // TODO: debugging.
333 // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
334 for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) {
335 sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx);
336 if (sampleEndIdx == -1) {
337 sampleEndIdx = samples.length();
338 }
339 const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx);
340 // ss.erase();
341 // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
342 int32_t tildeIndex = sampleRange.indexOf(TILDE);
343 if (tildeIndex < 0) {
344 FixedDecimal fixed(sampleRange, status);
345 double sampleValue = fixed.source;
346 if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) {
347 dest[sampleCount++] = sampleValue;
348 }
349 } else {
350
351 FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status);
352 FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status);
353 double rangeLo = fixedLo.source;
354 double rangeHi = fixedHi.source;
355 if (U_FAILURE(status)) {
356 break;
357 }
358 if (rangeHi < rangeLo) {
359 status = U_INVALID_FORMAT_ERROR;
360 break;
361 }
4388f060 362
57a6839d
A
363 // For ranges of samples with fraction decimal digits, scale the number up so that we
364 // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
4388f060 365
57a6839d
A
366 double scale = scaleForInt(rangeLo);
367 double t = scaleForInt(rangeHi);
368 if (t > scale) {
369 scale = t;
370 }
371 rangeLo *= scale;
372 rangeHi *= scale;
373 for (double n=rangeLo; n<=rangeHi; n+=1) {
374 // Hack Alert: don't return any decimal samples with integer values that
375 // originated from a format with trailing decimals.
376 // This API is returning doubles, which can't distinguish having displayed
377 // zeros to the right of the decimal.
378 // This results in test failures with values mapping back to a different keyword.
379 double sampleValue = n/scale;
380 if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) {
381 dest[sampleCount++] = sampleValue;
382 }
383 if (sampleCount >= destCapacity) {
384 break;
385 }
386 }
4388f060 387 }
57a6839d 388 sampleStartIdx = sampleEndIdx + 1;
4388f060 389 }
57a6839d
A
390 return sampleCount;
391}
4388f060 392
57a6839d
A
393
394int32_t
395PluralRules::getSamples(const UnicodeString &keyword, double *dest,
396 int32_t destCapacity, UErrorCode& status) {
397 RuleChain *rc = rulesForKeyword(keyword);
398 if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) {
399 return 0;
4388f060 400 }
57a6839d
A
401 int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status);
402 if (numSamples == 0) {
403 numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status);
4388f060 404 }
57a6839d
A
405 return numSamples;
406}
407
408
409RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const {
410 RuleChain *rc;
411 for (rc = mRules; rc != NULL; rc = rc->fNext) {
412 if (rc->fKeyword == keyword) {
413 break;
414 }
415 }
416 return rc;
4388f060
A
417}
418
46f4442e
A
419
420UBool
421PluralRules::isKeyword(const UnicodeString& keyword) const {
4388f060 422 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
46f4442e
A
423 return true;
424 }
57a6839d 425 return rulesForKeyword(keyword) != NULL;
46f4442e
A
426}
427
428UnicodeString
429PluralRules::getKeywordOther() const {
4388f060 430 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
46f4442e
A
431}
432
433UBool
434PluralRules::operator==(const PluralRules& other) const {
46f4442e
A
435 const UnicodeString *ptrKeyword;
436 UErrorCode status= U_ZERO_ERROR;
437
438 if ( this == &other ) {
439 return TRUE;
440 }
4388f060
A
441 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
442 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
729e4ab9
A
443 if (U_FAILURE(status)) {
444 return FALSE;
445 }
46f4442e 446
4388f060
A
447 if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
448 return FALSE;
46f4442e 449 }
4388f060
A
450 myKeywordList->reset(status);
451 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
452 if (!other.isKeyword(*ptrKeyword)) {
729e4ab9
A
453 return FALSE;
454 }
4388f060
A
455 }
456 otherKeywordList->reset(status);
457 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
458 if (!this->isKeyword(*ptrKeyword)) {
46f4442e
A
459 return FALSE;
460 }
461 }
4388f060
A
462 if (U_FAILURE(status)) {
463 return FALSE;
464 }
46f4442e 465
46f4442e
A
466 return TRUE;
467}
468
57a6839d 469
46f4442e 470void
57a6839d 471PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status)
46f4442e 472{
729e4ab9
A
473 if (U_FAILURE(status)) {
474 return;
475 }
57a6839d
A
476 U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only!
477 ruleSrc = &ruleData;
478
479 while (ruleIndex< ruleSrc->length()) {
480 getNextToken(status);
46f4442e
A
481 if (U_FAILURE(status)) {
482 return;
483 }
57a6839d 484 checkSyntax(status);
46f4442e
A
485 if (U_FAILURE(status)) {
486 return;
487 }
488 switch (type) {
489 case tAnd:
4388f060 490 U_ASSERT(curAndConstraint != NULL);
46f4442e
A
491 curAndConstraint = curAndConstraint->add();
492 break;
493 case tOr:
57a6839d
A
494 {
495 U_ASSERT(currentChain != NULL);
496 OrConstraint *orNode=currentChain->ruleHeader;
497 while (orNode->next != NULL) {
498 orNode = orNode->next;
499 }
500 orNode->next= new OrConstraint();
501 orNode=orNode->next;
502 orNode->next=NULL;
503 curAndConstraint = orNode->add();
46f4442e 504 }
46f4442e
A
505 break;
506 case tIs:
4388f060 507 U_ASSERT(curAndConstraint != NULL);
57a6839d
A
508 U_ASSERT(curAndConstraint->value == -1);
509 U_ASSERT(curAndConstraint->rangeList == NULL);
46f4442e
A
510 break;
511 case tNot:
4388f060 512 U_ASSERT(curAndConstraint != NULL);
57a6839d 513 curAndConstraint->negated=TRUE;
46f4442e 514 break;
57a6839d
A
515
516 case tNotEqual:
517 curAndConstraint->negated=TRUE;
2ca993e8 518 U_FALLTHROUGH;
46f4442e 519 case tIn:
46f4442e 520 case tWithin:
57a6839d 521 case tEqual:
4388f060 522 U_ASSERT(curAndConstraint != NULL);
57a6839d
A
523 curAndConstraint->rangeList = new UVector32(status);
524 curAndConstraint->rangeList->addElement(-1, status); // range Low
525 curAndConstraint->rangeList->addElement(-1, status); // range Hi
526 rangeLowIdx = 0;
527 rangeHiIdx = 1;
528 curAndConstraint->value=PLURAL_RANGE_HIGH;
529 curAndConstraint->integerOnly = (type != tWithin);
46f4442e
A
530 break;
531 case tNumber:
4388f060 532 U_ASSERT(curAndConstraint != NULL);
46f4442e
A
533 if ( (curAndConstraint->op==AndConstraint::MOD)&&
534 (curAndConstraint->opNum == -1 ) ) {
535 curAndConstraint->opNum=getNumberValue(token);
536 }
537 else {
57a6839d
A
538 if (curAndConstraint->rangeList == NULL) {
539 // this is for an 'is' rule
540 curAndConstraint->value = getNumberValue(token);
541 } else {
542 // this is for an 'in' or 'within' rule
543 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) {
544 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx);
545 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
546 }
547 else {
548 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
549 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) >
550 curAndConstraint->rangeList->elementAti(rangeHiIdx)) {
551 // Range Lower bound > Range Upper bound.
552 // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
553 // used for all plural rule parse errors.
554 status = U_UNEXPECTED_TOKEN;
555 break;
556 }
557 }
46f4442e
A
558 }
559 }
560 break;
57a6839d
A
561 case tComma:
562 // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
563 // Catch cases like "n mod 10, is 1" here instead.
564 if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) {
565 status = U_UNEXPECTED_TOKEN;
566 break;
567 }
568 U_ASSERT(curAndConstraint->rangeList->size() >= 2);
569 rangeLowIdx = curAndConstraint->rangeList->size();
570 curAndConstraint->rangeList->addElement(-1, status); // range Low
571 rangeHiIdx = curAndConstraint->rangeList->size();
572 curAndConstraint->rangeList->addElement(-1, status); // range Hi
573 break;
46f4442e 574 case tMod:
4388f060 575 U_ASSERT(curAndConstraint != NULL);
46f4442e
A
576 curAndConstraint->op=AndConstraint::MOD;
577 break;
57a6839d
A
578 case tVariableN:
579 case tVariableI:
580 case tVariableF:
581 case tVariableT:
582 case tVariableV:
583 U_ASSERT(curAndConstraint != NULL);
584 curAndConstraint->digitsType = type;
585 break;
46f4442e 586 case tKeyword:
57a6839d
A
587 {
588 RuleChain *newChain = new RuleChain;
589 if (newChain == NULL) {
590 status = U_MEMORY_ALLOCATION_ERROR;
591 break;
46f4442e 592 }
57a6839d
A
593 newChain->fKeyword = token;
594 if (prules->mRules == NULL) {
595 prules->mRules = newChain;
596 } else {
597 // The new rule chain goes at the end of the linked list of rule chains,
598 // unless there is an "other" keyword & chain. "other" must remain last.
599 RuleChain *insertAfter = prules->mRules;
600 while (insertAfter->fNext!=NULL &&
601 insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){
602 insertAfter=insertAfter->fNext;
46f4442e 603 }
57a6839d
A
604 newChain->fNext = insertAfter->fNext;
605 insertAfter->fNext = newChain;
4388f060 606 }
57a6839d
A
607 OrConstraint *orNode = new OrConstraint();
608 newChain->ruleHeader = orNode;
46f4442e 609 curAndConstraint = orNode->add();
57a6839d
A
610 currentChain = newChain;
611 }
46f4442e 612 break;
46f4442e 613
57a6839d
A
614 case tInteger:
615 for (;;) {
616 getNextToken(status);
617 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
618 break;
4388f060 619 }
57a6839d
A
620 if (type == tEllipsis) {
621 currentChain->fIntegerSamplesUnbounded = TRUE;
622 continue;
623 }
624 currentChain->fIntegerSamples.append(token);
4388f060 625 }
57a6839d 626 break;
4388f060 627
57a6839d
A
628 case tDecimal:
629 for (;;) {
630 getNextToken(status);
631 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
4388f060
A
632 break;
633 }
57a6839d
A
634 if (type == tEllipsis) {
635 currentChain->fDecimalSamplesUnbounded = TRUE;
636 continue;
637 }
638 currentChain->fDecimalSamples.append(token);
4388f060 639 }
57a6839d
A
640 break;
641
642 default:
643 break;
4388f060 644 }
57a6839d
A
645 prevType=type;
646 if (U_FAILURE(status)) {
647 break;
4388f060
A
648 }
649 }
46f4442e
A
650}
651
652UnicodeString
51004dcb 653PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
46f4442e 654 UnicodeString emptyStr;
4388f060 655
729e4ab9
A
656 if (U_FAILURE(errCode)) {
657 return emptyStr;
658 }
51004dcb 659 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode));
46f4442e 660 if(U_FAILURE(errCode)) {
46f4442e
A
661 return emptyStr;
662 }
51004dcb
A
663 const char *typeKey;
664 switch (type) {
665 case UPLURAL_TYPE_CARDINAL:
666 typeKey = "locales";
667 break;
668 case UPLURAL_TYPE_ORDINAL:
669 typeKey = "locales_ordinals";
670 break;
671 default:
672 // Must not occur: The caller should have checked for valid types.
673 errCode = U_ILLEGAL_ARGUMENT_ERROR;
674 return emptyStr;
675 }
676 LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode));
46f4442e 677 if(U_FAILURE(errCode)) {
46f4442e 678 return emptyStr;
4388f060 679 }
46f4442e
A
680 int32_t resLen=0;
681 const char *curLocaleName=locale.getName();
51004dcb 682 const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
46f4442e
A
683
684 if (s == NULL) {
685 // Check parent locales.
686 UErrorCode status = U_ZERO_ERROR;
687 char parentLocaleName[ULOC_FULLNAME_CAPACITY];
688 const char *curLocaleName=locale.getName();
46f4442e 689 uprv_strcpy(parentLocaleName, curLocaleName);
4388f060 690
51004dcb
A
691 while (uloc_getParent(parentLocaleName, parentLocaleName,
692 ULOC_FULLNAME_CAPACITY, &status) > 0) {
46f4442e 693 resLen=0;
51004dcb 694 s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
46f4442e
A
695 if (s != NULL) {
696 errCode = U_ZERO_ERROR;
697 break;
698 }
699 status = U_ZERO_ERROR;
700 }
701 }
702 if (s==NULL) {
46f4442e
A
703 return emptyStr;
704 }
4388f060 705
46f4442e 706 char setKey[256];
46f4442e
A
707 u_UCharsToChars(s, setKey, resLen + 1);
708 // printf("\n PluralRule: %s\n", setKey);
4388f060 709
51004dcb 710 LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode));
46f4442e 711 if(U_FAILURE(errCode)) {
46f4442e
A
712 return emptyStr;
713 }
51004dcb 714 LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode));
46f4442e 715 if (U_FAILURE(errCode)) {
46f4442e
A
716 return emptyStr;
717 }
718
51004dcb 719 int32_t numberKeys = ures_getSize(setRes.getAlias());
57a6839d
A
720 UnicodeString result;
721 const char *key=NULL;
722 for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ...
723 UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode);
724 UnicodeString uKey(key, -1, US_INV);
725 result.append(uKey);
726 result.append(COLON);
727 result.append(rules);
728 result.append(SEMI_COLON);
729 }
730 return result;
731}
732
733
734UnicodeString
735PluralRules::getRules() const {
736 UnicodeString rules;
737 if (mRules != NULL) {
738 mRules->dumpRules(rules);
739 }
740 return rules;
46f4442e
A
741}
742
57a6839d 743
46f4442e
A
744AndConstraint::AndConstraint() {
745 op = AndConstraint::NONE;
746 opNum=-1;
57a6839d
A
747 value = -1;
748 rangeList = NULL;
749 negated = FALSE;
750 integerOnly = FALSE;
751 digitsType = none;
46f4442e
A
752 next=NULL;
753}
754
755
756AndConstraint::AndConstraint(const AndConstraint& other) {
757 this->op = other.op;
758 this->opNum=other.opNum;
57a6839d
A
759 this->value=other.value;
760 this->rangeList=NULL;
761 if (other.rangeList != NULL) {
762 UErrorCode status = U_ZERO_ERROR;
763 this->rangeList = new UVector32(status);
764 this->rangeList->assign(*other.rangeList, status);
765 }
46f4442e 766 this->integerOnly=other.integerOnly;
57a6839d
A
767 this->negated=other.negated;
768 this->digitsType = other.digitsType;
46f4442e
A
769 if (other.next==NULL) {
770 this->next=NULL;
771 }
772 else {
773 this->next = new AndConstraint(*other.next);
774 }
775}
776
777AndConstraint::~AndConstraint() {
57a6839d 778 delete rangeList;
46f4442e
A
779 if (next!=NULL) {
780 delete next;
781 }
782}
783
784
785UBool
57a6839d
A
786AndConstraint::isFulfilled(const FixedDecimal &number) {
787 UBool result = TRUE;
788 if (digitsType == none) {
789 // An empty AndConstraint, created by a rule with a keyword but no following expression.
790 return TRUE;
46f4442e 791 }
57a6839d
A
792 double n = number.get(digitsType); // pulls n | i | v | f value for the number.
793 // Will always be positive.
794 // May be non-integer (n option only)
795 do {
796 if (integerOnly && n != uprv_floor(n)) {
797 result = FALSE;
798 break;
799 }
800
801 if (op == MOD) {
802 n = fmod(n, opNum);
803 }
804 if (rangeList == NULL) {
805 result = value == -1 || // empty rule
806 n == value; // 'is' rule
807 break;
46f4442e 808 }
57a6839d
A
809 result = FALSE; // 'in' or 'within' rule
810 for (int32_t r=0; r<rangeList->size(); r+=2) {
811 if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) {
812 result = TRUE;
813 break;
814 }
46f4442e 815 }
57a6839d
A
816 } while (FALSE);
817
818 if (negated) {
819 result = !result;
46f4442e 820 }
57a6839d 821 return result;
46f4442e
A
822}
823
824
825AndConstraint*
826AndConstraint::add()
827{
828 this->next = new AndConstraint();
829 return this->next;
830}
831
832OrConstraint::OrConstraint() {
833 childNode=NULL;
834 next=NULL;
835}
836
837OrConstraint::OrConstraint(const OrConstraint& other) {
838 if ( other.childNode == NULL ) {
839 this->childNode = NULL;
840 }
841 else {
842 this->childNode = new AndConstraint(*(other.childNode));
843 }
844 if (other.next == NULL ) {
845 this->next = NULL;
846 }
847 else {
848 this->next = new OrConstraint(*(other.next));
849 }
850}
851
852OrConstraint::~OrConstraint() {
853 if (childNode!=NULL) {
854 delete childNode;
855 }
856 if (next!=NULL) {
857 delete next;
858 }
859}
860
861AndConstraint*
862OrConstraint::add()
863{
864 OrConstraint *curOrConstraint=this;
865 {
866 while (curOrConstraint->next!=NULL) {
867 curOrConstraint = curOrConstraint->next;
868 }
57a6839d 869 U_ASSERT(curOrConstraint->childNode == NULL);
46f4442e
A
870 curOrConstraint->childNode = new AndConstraint();
871 }
872 return curOrConstraint->childNode;
873}
874
875UBool
57a6839d 876OrConstraint::isFulfilled(const FixedDecimal &number) {
46f4442e
A
877 OrConstraint* orRule=this;
878 UBool result=FALSE;
4388f060 879
46f4442e
A
880 while (orRule!=NULL && !result) {
881 result=TRUE;
882 AndConstraint* andRule = orRule->childNode;
883 while (andRule!=NULL && result) {
884 result = andRule->isFulfilled(number);
885 andRule=andRule->next;
886 }
887 orRule = orRule->next;
888 }
4388f060 889
46f4442e
A
890 return result;
891}
892
893
57a6839d
A
894RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(),
895 fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) {
46f4442e
A
896}
897
57a6839d
A
898RuleChain::RuleChain(const RuleChain& other) :
899 fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples),
900 fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded),
901 fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) {
46f4442e
A
902 if (other.ruleHeader != NULL) {
903 this->ruleHeader = new OrConstraint(*(other.ruleHeader));
904 }
57a6839d
A
905 if (other.fNext != NULL ) {
906 this->fNext = new RuleChain(*other.fNext);
46f4442e
A
907 }
908}
909
910RuleChain::~RuleChain() {
57a6839d
A
911 delete fNext;
912 delete ruleHeader;
46f4442e
A
913}
914
4388f060 915
57a6839d
A
916UnicodeString
917RuleChain::select(const FixedDecimal &number) const {
918 if (!number.isNanOrInfinity) {
919 for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) {
920 if (rules->ruleHeader->isFulfilled(number)) {
921 return rules->fKeyword;
922 }
923 }
924 }
925 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
926}
46f4442e 927
57a6839d
A
928static UnicodeString tokenString(tokenType tok) {
929 UnicodeString s;
930 switch (tok) {
931 case tVariableN:
932 s.append(LOW_N); break;
933 case tVariableI:
934 s.append(LOW_I); break;
935 case tVariableF:
936 s.append(LOW_F); break;
937 case tVariableV:
938 s.append(LOW_V); break;
939 case tVariableT:
940 s.append(LOW_T); break;
941 default:
942 s.append(TILDE);
943 }
944 return s;
46f4442e
A
945}
946
947void
948RuleChain::dumpRules(UnicodeString& result) {
949 UChar digitString[16];
4388f060 950
46f4442e 951 if ( ruleHeader != NULL ) {
57a6839d
A
952 result += fKeyword;
953 result += COLON;
954 result += SPACE;
46f4442e
A
955 OrConstraint* orRule=ruleHeader;
956 while ( orRule != NULL ) {
957 AndConstraint* andRule=orRule->childNode;
958 while ( andRule != NULL ) {
57a6839d
A
959 if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) {
960 // Empty Rules.
961 } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) {
962 result += tokenString(andRule->digitsType);
963 result += UNICODE_STRING_SIMPLE(" is ");
964 if (andRule->negated) {
46f4442e
A
965 result += UNICODE_STRING_SIMPLE("not ");
966 }
57a6839d 967 uprv_itou(digitString,16, andRule->value,10,0);
46f4442e
A
968 result += UnicodeString(digitString);
969 }
970 else {
57a6839d
A
971 result += tokenString(andRule->digitsType);
972 result += SPACE;
46f4442e 973 if (andRule->op==AndConstraint::MOD) {
57a6839d 974 result += UNICODE_STRING_SIMPLE("mod ");
46f4442e
A
975 uprv_itou(digitString,16, andRule->opNum,10,0);
976 result += UnicodeString(digitString);
977 }
57a6839d
A
978 if (andRule->rangeList==NULL) {
979 if (andRule->negated) {
46f4442e 980 result += UNICODE_STRING_SIMPLE(" is not ");
57a6839d 981 uprv_itou(digitString,16, andRule->value,10,0);
46f4442e
A
982 result += UnicodeString(digitString);
983 }
984 else {
985 result += UNICODE_STRING_SIMPLE(" is ");
57a6839d 986 uprv_itou(digitString,16, andRule->value,10,0);
46f4442e
A
987 result += UnicodeString(digitString);
988 }
989 }
990 else {
57a6839d 991 if (andRule->negated) {
46f4442e 992 if ( andRule->integerOnly ) {
57a6839d 993 result += UNICODE_STRING_SIMPLE(" not in ");
46f4442e
A
994 }
995 else {
57a6839d 996 result += UNICODE_STRING_SIMPLE(" not within ");
46f4442e 997 }
46f4442e
A
998 }
999 else {
1000 if ( andRule->integerOnly ) {
1001 result += UNICODE_STRING_SIMPLE(" in ");
1002 }
1003 else {
1004 result += UNICODE_STRING_SIMPLE(" within ");
1005 }
57a6839d
A
1006 }
1007 for (int32_t r=0; r<andRule->rangeList->size(); r+=2) {
1008 int32_t rangeLo = andRule->rangeList->elementAti(r);
1009 int32_t rangeHi = andRule->rangeList->elementAti(r+1);
1010 uprv_itou(digitString,16, rangeLo, 10, 0);
46f4442e 1011 result += UnicodeString(digitString);
57a6839d
A
1012 result += UNICODE_STRING_SIMPLE("..");
1013 uprv_itou(digitString,16, rangeHi, 10,0);
1014 result += UnicodeString(digitString);
1015 if (r+2 < andRule->rangeList->size()) {
1016 result += UNICODE_STRING_SIMPLE(", ");
1017 }
46f4442e
A
1018 }
1019 }
1020 }
1021 if ( (andRule=andRule->next) != NULL) {
57a6839d 1022 result += UNICODE_STRING_SIMPLE(" and ");
46f4442e
A
1023 }
1024 }
1025 if ( (orRule = orRule->next) != NULL ) {
57a6839d 1026 result += UNICODE_STRING_SIMPLE(" or ");
46f4442e
A
1027 }
1028 }
1029 }
57a6839d
A
1030 if ( fNext != NULL ) {
1031 result += UNICODE_STRING_SIMPLE("; ");
1032 fNext->dumpRules(result);
46f4442e
A
1033 }
1034}
1035
46f4442e
A
1036
1037UErrorCode
1038RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1039 if ( arraySize < capacityOfKeywords-1 ) {
57a6839d 1040 keywords[arraySize++]=fKeyword;
46f4442e
A
1041 }
1042 else {
1043 return U_BUFFER_OVERFLOW_ERROR;
1044 }
1045
57a6839d
A
1046 if ( fNext != NULL ) {
1047 return fNext->getKeywords(capacityOfKeywords, keywords, arraySize);
46f4442e
A
1048 }
1049 else {
1050 return U_ZERO_ERROR;
1051 }
1052}
1053
1054UBool
1055RuleChain::isKeyword(const UnicodeString& keywordParam) const {
57a6839d 1056 if ( fKeyword == keywordParam ) {
46f4442e
A
1057 return TRUE;
1058 }
1059
57a6839d
A
1060 if ( fNext != NULL ) {
1061 return fNext->isKeyword(keywordParam);
46f4442e
A
1062 }
1063 else {
1064 return FALSE;
1065 }
1066}
1067
1068
57a6839d
A
1069PluralRuleParser::PluralRuleParser() :
1070 ruleIndex(0), token(), type(none), prevType(none),
1071 curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1)
1072{
1073}
1074
1075PluralRuleParser::~PluralRuleParser() {
46f4442e
A
1076}
1077
57a6839d
A
1078
1079int32_t
1080PluralRuleParser::getNumberValue(const UnicodeString& token) {
1081 int32_t i;
1082 char digits[128];
1083
2ca993e8 1084 i = token.extract(0, token.length(), digits, UPRV_LENGTHOF(digits), US_INV);
57a6839d
A
1085 digits[i]='\0';
1086
1087 return((int32_t)atoi(digits));
46f4442e
A
1088}
1089
57a6839d 1090
46f4442e 1091void
57a6839d 1092PluralRuleParser::checkSyntax(UErrorCode &status)
46f4442e
A
1093{
1094 if (U_FAILURE(status)) {
1095 return;
1096 }
57a6839d
A
1097 if (!(prevType==none || prevType==tSemiColon)) {
1098 type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word,
1099 // and we are not at the start of a rule, where a
1100 // keyword is expected.
1101 }
1102
46f4442e
A
1103 switch(prevType) {
1104 case none:
1105 case tSemiColon:
57a6839d 1106 if (type!=tKeyword && type != tEOF) {
46f4442e
A
1107 status = U_UNEXPECTED_TOKEN;
1108 }
1109 break;
57a6839d
A
1110 case tVariableN:
1111 case tVariableI:
1112 case tVariableF:
1113 case tVariableT:
1114 case tVariableV:
1115 if (type != tIs && type != tMod && type != tIn &&
1116 type != tNot && type != tWithin && type != tEqual && type != tNotEqual) {
46f4442e
A
1117 status = U_UNEXPECTED_TOKEN;
1118 }
1119 break;
46f4442e 1120 case tKeyword:
57a6839d 1121 if (type != tColon) {
46f4442e
A
1122 status = U_UNEXPECTED_TOKEN;
1123 }
1124 break;
57a6839d
A
1125 case tColon:
1126 if (!(type == tVariableN ||
1127 type == tVariableI ||
1128 type == tVariableF ||
1129 type == tVariableT ||
1130 type == tVariableV ||
1131 type == tAt)) {
46f4442e
A
1132 status = U_UNEXPECTED_TOKEN;
1133 }
1134 break;
1135 case tIs:
57a6839d 1136 if ( type != tNumber && type != tNot) {
46f4442e
A
1137 status = U_UNEXPECTED_TOKEN;
1138 }
1139 break;
1140 case tNot:
57a6839d 1141 if (type != tNumber && type != tIn && type != tWithin) {
46f4442e
A
1142 status = U_UNEXPECTED_TOKEN;
1143 }
1144 break;
1145 case tMod:
57a6839d 1146 case tDot2:
46f4442e
A
1147 case tIn:
1148 case tWithin:
57a6839d
A
1149 case tEqual:
1150 case tNotEqual:
1151 if (type != tNumber) {
1152 status = U_UNEXPECTED_TOKEN;
1153 }
1154 break;
46f4442e
A
1155 case tAnd:
1156 case tOr:
57a6839d
A
1157 if ( type != tVariableN &&
1158 type != tVariableI &&
1159 type != tVariableF &&
1160 type != tVariableT &&
1161 type != tVariableV) {
1162 status = U_UNEXPECTED_TOKEN;
1163 }
1164 break;
1165 case tComma:
1166 if (type != tNumber) {
46f4442e
A
1167 status = U_UNEXPECTED_TOKEN;
1168 }
1169 break;
1170 case tNumber:
57a6839d
A
1171 if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot &&
1172 type != tIn && type != tEqual && type != tNotEqual && type != tWithin &&
1173 type != tAnd && type != tOr && type != tComma && type != tAt &&
1174 type != tEOF)
46f4442e
A
1175 {
1176 status = U_UNEXPECTED_TOKEN;
1177 }
57a6839d
A
1178 // TODO: a comma following a number that is not part of a range will be allowed.
1179 // It's not the only case of this sort of thing. Parser needs a re-write.
1180 break;
1181 case tAt:
1182 if (type != tDecimal && type != tInteger) {
1183 status = U_UNEXPECTED_TOKEN;
1184 }
46f4442e
A
1185 break;
1186 default:
1187 status = U_UNEXPECTED_TOKEN;
1188 break;
1189 }
1190}
1191
57a6839d
A
1192
1193/*
1194 * Scan the next token from the input rules.
1195 * rules and returned token type are in the parser state variables.
1196 */
46f4442e 1197void
57a6839d 1198PluralRuleParser::getNextToken(UErrorCode &status)
46f4442e 1199{
729e4ab9
A
1200 if (U_FAILURE(status)) {
1201 return;
1202 }
57a6839d
A
1203
1204 UChar ch;
1205 while (ruleIndex < ruleSrc->length()) {
1206 ch = ruleSrc->charAt(ruleIndex);
1207 type = charType(ch);
1208 if (type != tSpace) {
1209 break;
46f4442e 1210 }
57a6839d 1211 ++(ruleIndex);
46f4442e 1212 }
57a6839d
A
1213 if (ruleIndex >= ruleSrc->length()) {
1214 type = tEOF;
1215 return;
46f4442e 1216 }
57a6839d
A
1217 int32_t curIndex= ruleIndex;
1218
1219 switch (type) {
1220 case tColon:
1221 case tSemiColon:
1222 case tComma:
1223 case tEllipsis:
1224 case tTilde: // scanned '~'
1225 case tAt: // scanned '@'
1226 case tEqual: // scanned '='
1227 case tMod: // scanned '%'
1228 // Single character tokens.
1229 ++curIndex;
1230 break;
1231
1232 case tNotEqual: // scanned '!'
1233 if (ruleSrc->charAt(curIndex+1) == EQUALS) {
1234 curIndex += 2;
1235 } else {
1236 type = none;
1237 curIndex += 1;
1238 }
1239 break;
1240
1241 case tKeyword:
1242 while (type == tKeyword && ++curIndex < ruleSrc->length()) {
1243 ch = ruleSrc->charAt(curIndex);
1244 type = charType(ch);
1245 }
1246 type = tKeyword;
1247 break;
1248
1249 case tNumber:
1250 while (type == tNumber && ++curIndex < ruleSrc->length()) {
1251 ch = ruleSrc->charAt(curIndex);
1252 type = charType(ch);
1253 }
1254 type = tNumber;
1255 break;
1256
1257 case tDot:
1258 // We could be looking at either ".." in a range, or "..." at the end of a sample.
1259 if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) {
1260 ++curIndex;
1261 break; // Single dot
1262 }
1263 if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) {
1264 curIndex += 2;
1265 type = tDot2;
1266 break; // double dot
1267 }
1268 type = tEllipsis;
1269 curIndex += 3;
1270 break; // triple dot
1271
1272 default:
1273 status = U_UNEXPECTED_TOKEN;
1274 ++curIndex;
1275 break;
1276 }
1277
1278 U_ASSERT(ruleIndex <= ruleSrc->length());
1279 U_ASSERT(curIndex <= ruleSrc->length());
1280 token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex);
1281 ruleIndex = curIndex;
46f4442e
A
1282}
1283
57a6839d
A
1284tokenType
1285PluralRuleParser::charType(UChar ch) {
46f4442e 1286 if ((ch>=U_ZERO) && (ch<=U_NINE)) {
57a6839d
A
1287 return tNumber;
1288 }
1289 if (ch>=LOW_A && ch<=LOW_Z) {
1290 return tKeyword;
46f4442e
A
1291 }
1292 switch (ch) {
1293 case COLON:
57a6839d 1294 return tColon;
46f4442e 1295 case SPACE:
57a6839d 1296 return tSpace;
46f4442e 1297 case SEMI_COLON:
57a6839d 1298 return tSemiColon;
46f4442e 1299 case DOT:
57a6839d
A
1300 return tDot;
1301 case COMMA:
1302 return tComma;
1303 case EXCLAMATION:
1304 return tNotEqual;
1305 case EQUALS:
1306 return tEqual;
1307 case PERCENT_SIGN:
1308 return tMod;
1309 case AT:
1310 return tAt;
1311 case ELLIPSIS:
1312 return tEllipsis;
1313 case TILDE:
1314 return tTilde;
46f4442e 1315 default :
57a6839d 1316 return none;
46f4442e
A
1317 }
1318}
1319
1320
57a6839d
A
1321// Set token type for reserved words in the Plural Rule syntax.
1322
1323tokenType
1324PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType)
46f4442e 1325{
57a6839d
A
1326 if (keyType != tKeyword) {
1327 return keyType;
729e4ab9 1328 }
57a6839d
A
1329
1330 if (0 == token.compare(PK_VAR_N, 1)) {
46f4442e 1331 keyType = tVariableN;
57a6839d
A
1332 } else if (0 == token.compare(PK_VAR_I, 1)) {
1333 keyType = tVariableI;
1334 } else if (0 == token.compare(PK_VAR_F, 1)) {
1335 keyType = tVariableF;
1336 } else if (0 == token.compare(PK_VAR_T, 1)) {
1337 keyType = tVariableT;
1338 } else if (0 == token.compare(PK_VAR_V, 1)) {
1339 keyType = tVariableV;
1340 } else if (0 == token.compare(PK_IS, 2)) {
46f4442e 1341 keyType = tIs;
57a6839d 1342 } else if (0 == token.compare(PK_AND, 3)) {
46f4442e 1343 keyType = tAnd;
57a6839d 1344 } else if (0 == token.compare(PK_IN, 2)) {
46f4442e 1345 keyType = tIn;
57a6839d 1346 } else if (0 == token.compare(PK_WITHIN, 6)) {
46f4442e 1347 keyType = tWithin;
57a6839d 1348 } else if (0 == token.compare(PK_NOT, 3)) {
46f4442e 1349 keyType = tNot;
57a6839d 1350 } else if (0 == token.compare(PK_MOD, 3)) {
46f4442e 1351 keyType = tMod;
57a6839d 1352 } else if (0 == token.compare(PK_OR, 2)) {
46f4442e 1353 keyType = tOr;
57a6839d
A
1354 } else if (0 == token.compare(PK_DECIMAL, 7)) {
1355 keyType = tDecimal;
1356 } else if (0 == token.compare(PK_INTEGER, 7)) {
1357 keyType = tInteger;
46f4442e 1358 }
57a6839d 1359 return keyType;
46f4442e
A
1360}
1361
46f4442e 1362
4388f060
A
1363PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1364 : pos(0), fKeywordNames(status) {
729e4ab9
A
1365 if (U_FAILURE(status)) {
1366 return;
1367 }
4388f060
A
1368 fKeywordNames.setDeleter(uprv_deleteUObject);
1369 UBool addKeywordOther=TRUE;
1370 RuleChain *node=header;
46f4442e 1371 while(node!=NULL) {
57a6839d 1372 fKeywordNames.addElement(new UnicodeString(node->fKeyword), status);
729e4ab9
A
1373 if (U_FAILURE(status)) {
1374 return;
1375 }
57a6839d 1376 if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
4388f060 1377 addKeywordOther= FALSE;
46f4442e 1378 }
57a6839d 1379 node=node->fNext;
46f4442e 1380 }
4388f060 1381
46f4442e
A
1382 if (addKeywordOther) {
1383 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1384 }
1385}
1386
1387const UnicodeString*
1388PluralKeywordEnumeration::snext(UErrorCode& status) {
1389 if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1390 return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1391 }
1392 return NULL;
1393}
1394
1395void
1396PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1397 pos=0;
1398}
1399
1400int32_t
1401PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1402 return fKeywordNames.size();
1403}
1404
1405PluralKeywordEnumeration::~PluralKeywordEnumeration() {
46f4442e
A
1406}
1407
2ca993e8
A
1408FixedDecimal::FixedDecimal(const VisibleDigits &digits) {
1409 digits.getFixedDecimal(
1410 source, intValue, decimalDigits,
1411 decimalDigitsWithoutTrailingZeros,
1412 visibleDecimalDigitCount, hasIntegerValue);
1413 isNegative = digits.isNegative();
1414 isNanOrInfinity = digits.isNaNOrInfinity();
1415}
57a6839d
A
1416
1417FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) {
1418 init(n, v, f);
1419 // check values. TODO make into unit test.
1420 //
1421 // long visiblePower = (int) Math.pow(10, v);
1422 // if (decimalDigits > visiblePower) {
1423 // throw new IllegalArgumentException();
1424 // }
1425 // double fraction = intValue + (decimalDigits / (double) visiblePower);
1426 // if (fraction != source) {
1427 // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1428 // if (diff > 0.00000001d) {
1429 // throw new IllegalArgumentException();
1430 // }
1431 // }
1432}
1433
1434FixedDecimal::FixedDecimal(double n, int32_t v) {
1435 // Ugly, but for samples we don't care.
1436 init(n, v, getFractionalDigits(n, v));
1437}
1438
1439FixedDecimal::FixedDecimal(double n) {
1440 init(n);
1441}
1442
1443FixedDecimal::FixedDecimal() {
1444 init(0, 0, 0);
1445}
1446
1447
1448// Create a FixedDecimal from a UnicodeString containing a number.
1449// Inefficient, but only used for samples, so simplicity trumps efficiency.
1450
1451FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
1452 CharString cs;
1453 cs.appendInvariantChars(num, status);
1454 DigitList dl;
1455 dl.set(cs.toStringPiece(), status);
1456 if (U_FAILURE(status)) {
1457 init(0, 0, 0);
1458 return;
1459 }
1460 int32_t decimalPoint = num.indexOf(DOT);
1461 double n = dl.getDouble();
1462 if (decimalPoint == -1) {
1463 init(n, 0, 0);
1464 } else {
1465 int32_t v = num.length() - decimalPoint - 1;
1466 init(n, v, getFractionalDigits(n, v));
1467 }
1468}
1469
1470
1471FixedDecimal::FixedDecimal(const FixedDecimal &other) {
1472 source = other.source;
1473 visibleDecimalDigitCount = other.visibleDecimalDigitCount;
1474 decimalDigits = other.decimalDigits;
1475 decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros;
1476 intValue = other.intValue;
1477 hasIntegerValue = other.hasIntegerValue;
1478 isNegative = other.isNegative;
1479 isNanOrInfinity = other.isNanOrInfinity;
1480}
1481
1482
1483void FixedDecimal::init(double n) {
1484 int32_t numFractionDigits = decimals(n);
1485 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1486}
1487
1488
1489void FixedDecimal::init(double n, int32_t v, int64_t f) {
1490 isNegative = n < 0.0;
1491 source = fabs(n);
1492 isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source);
1493 if (isNanOrInfinity) {
1494 v = 0;
1495 f = 0;
1496 intValue = 0;
1497 hasIntegerValue = FALSE;
1498 } else {
1499 intValue = (int64_t)source;
1500 hasIntegerValue = (source == intValue);
1501 }
1502
1503 visibleDecimalDigitCount = v;
1504 decimalDigits = f;
1505 if (f == 0) {
1506 decimalDigitsWithoutTrailingZeros = 0;
1507 } else {
1508 int64_t fdwtz = f;
1509 while ((fdwtz%10) == 0) {
1510 fdwtz /= 10;
1511 }
1512 decimalDigitsWithoutTrailingZeros = fdwtz;
1513 }
1514}
1515
1516
1517// Fast path only exact initialization. Return true if successful.
1518// Note: Do not multiply by 10 each time through loop, rounding cruft can build
1519// up that makes the check for an integer result fail.
1520// A single multiply of the original number works more reliably.
1521static int32_t p10[] = {1, 10, 100, 1000, 10000};
1522UBool FixedDecimal::quickInit(double n) {
1523 UBool success = FALSE;
1524 n = fabs(n);
1525 int32_t numFractionDigits;
1526 for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) {
1527 double scaledN = n * p10[numFractionDigits];
1528 if (scaledN == floor(scaledN)) {
1529 success = TRUE;
1530 break;
1531 }
1532 }
1533 if (success) {
1534 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1535 }
1536 return success;
1537}
1538
1539
1540
1541int32_t FixedDecimal::decimals(double n) {
1542 // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1543 // fastpath the common cases, integers or fractions with 3 or fewer digits
1544 n = fabs(n);
1545 for (int ndigits=0; ndigits<=3; ndigits++) {
1546 double scaledN = n * p10[ndigits];
1547 if (scaledN == floor(scaledN)) {
1548 return ndigits;
1549 }
1550 }
1551
1552 // Slow path, convert with sprintf, parse converted output.
1553 char buf[30] = {0};
1554 sprintf(buf, "%1.15e", n);
1555 // formatted number looks like this: 1.234567890123457e-01
1556 int exponent = atoi(buf+18);
1557 int numFractionDigits = 15;
1558 for (int i=16; ; --i) {
1559 if (buf[i] != '0') {
1560 break;
1561 }
1562 --numFractionDigits;
1563 }
1564 numFractionDigits -= exponent; // Fraction part of fixed point representation.
1565 return numFractionDigits;
1566}
1567
1568
1569// Get the fraction digits of a double, represented as an integer.
1570// v is the number of visible fraction digits in the displayed form of the number.
1571// Example: n = 1001.234, v = 6, result = 234000
1572// TODO: need to think through how this is used in the plural rule context.
1573// This function can easily encounter integer overflow,
1574// and can easily return noise digits when the precision of a double is exceeded.
1575
1576int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) {
1577 if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) {
1578 return 0;
1579 }
1580 n = fabs(n);
1581 double fract = n - floor(n);
1582 switch (v) {
1583 case 1: return (int64_t)(fract*10.0 + 0.5);
1584 case 2: return (int64_t)(fract*100.0 + 0.5);
1585 case 3: return (int64_t)(fract*1000.0 + 0.5);
1586 default:
1587 double scaled = floor(fract * pow(10.0, (double)v) + 0.5);
1588 if (scaled > U_INT64_MAX) {
1589 return U_INT64_MAX;
1590 } else {
1591 return (int64_t)scaled;
1592 }
1593 }
1594}
1595
1596
1597void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) {
1598 int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount;
1599 if (numTrailingFractionZeros > 0) {
1600 for (int32_t i=0; i<numTrailingFractionZeros; i++) {
1601 // Do not let the decimalDigits value overflow if there are many trailing zeros.
1602 // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1603 if (decimalDigits >= 100000000000000000LL) {
1604 break;
1605 }
1606 decimalDigits *= 10;
1607 }
1608 visibleDecimalDigitCount += numTrailingFractionZeros;
1609 }
1610}
1611
1612
1613double FixedDecimal::get(tokenType operand) const {
1614 switch(operand) {
1615 case tVariableN: return source;
1616 case tVariableI: return (double)intValue;
1617 case tVariableF: return (double)decimalDigits;
1618 case tVariableT: return (double)decimalDigitsWithoutTrailingZeros;
1619 case tVariableV: return visibleDecimalDigitCount;
1620 default:
1621 U_ASSERT(FALSE); // unexpected.
1622 return source;
1623 }
1624}
1625
1626int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1627 return visibleDecimalDigitCount;
1628}
1629
1630
1631
1632PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) {
1633 fLocales = NULL;
1634 fRes = NULL;
1635 fOpenStatus = status;
1636 if (U_FAILURE(status)) {
1637 return;
1638 }
1639 fOpenStatus = U_ZERO_ERROR;
1640 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus));
1641 fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus);
1642}
1643
1644PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1645 ures_close(fLocales);
1646 ures_close(fRes);
1647 fLocales = NULL;
1648 fRes = NULL;
1649}
1650
1651const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) {
1652 if (U_FAILURE(status)) {
1653 return NULL;
1654 }
1655 if (U_FAILURE(fOpenStatus)) {
1656 status = fOpenStatus;
1657 return NULL;
1658 }
1659 fRes = ures_getNextResource(fLocales, fRes, &status);
1660 if (fRes == NULL || U_FAILURE(status)) {
1661 if (status == U_INDEX_OUTOFBOUNDS_ERROR) {
1662 status = U_ZERO_ERROR;
1663 }
1664 return NULL;
1665 }
1666 const char *result = ures_getKey(fRes);
1667 if (resultLength != NULL) {
1668 *resultLength = uprv_strlen(result);
1669 }
1670 return result;
1671}
1672
1673
1674void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) {
1675 if (U_FAILURE(status)) {
1676 return;
1677 }
1678 if (U_FAILURE(fOpenStatus)) {
1679 status = fOpenStatus;
1680 return;
1681 }
1682 ures_resetIterator(fLocales);
1683}
1684
1685int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const {
1686 if (U_FAILURE(status)) {
1687 return 0;
1688 }
1689 if (U_FAILURE(fOpenStatus)) {
1690 status = fOpenStatus;
1691 return 0;
1692 }
1693 return ures_getSize(fLocales);
1694}
1695
46f4442e
A
1696U_NAMESPACE_END
1697
1698
1699#endif /* #if !UCONFIG_NO_FORMATTING */
1700
1701//eof