icuSources/i18n/plurrule.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 2007-2011, International Business Machines Corporation and
   4 * others. All Rights Reserved.
   5 *******************************************************************************
   6 *
   7 * File PLURRULE.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *******************************************************************************
  13 */
  14
  15
  16 #include "unicode/utypes.h"
  17 #include "unicode/localpointer.h"
  18 #include "unicode/plurrule.h"
  19 #include "unicode/ures.h"
  20 #include "cmemory.h"
  21 #include "cstring.h"
  22 #include "hash.h"
  23 #include "mutex.h"
  24 #include "patternprops.h"
  25 #include "plurrule_impl.h"
  26 #include "putilimp.h"
  27 #include "ucln_in.h"
  28 #include "ustrfmt.h"
  29 #include "locutil.h"
  30 #include "uassert.h"
  31
  32 #if !UCONFIG_NO_FORMATTING
  33
  34 U_NAMESPACE_BEGIN
  35
  36 // shared by all instances when lazy-initializing samples
  37 static UMTX pluralMutex;
  38
  39 #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
  40
  41 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
  42 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
  43 static const UChar PK_IN[]={LOW_I,LOW_N,0};
  44 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
  45 static const UChar PK_IS[]={LOW_I,LOW_S,0};
  46 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
  47 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
  48 static const UChar PK_OR[]={LOW_O,LOW_R,0};
  49 static const UChar PK_VAR_N[]={LOW_N,0};
  50 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
  51
  52 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
  53 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
  54
  55 PluralRules::PluralRules(UErrorCode& status)
  56 :   UObject(),
  57     mRules(NULL),
  58     mParser(NULL),
  59     mSamples(NULL),
  60     mSampleInfo(NULL),
  61     mSampleInfoCount(0)
  62 {
  63     if (U_FAILURE(status)) {
  64         return;
  65     }
  66     mParser = new RuleParser();
  67     if (mParser==NULL) {
  68         status = U_MEMORY_ALLOCATION_ERROR;
  69     }
  70 }
  71
  72 PluralRules::PluralRules(const PluralRules& other)
  73 : UObject(other),
  74     mRules(NULL),
  75   mParser(NULL),
  76   mSamples(NULL),
  77   mSampleInfo(NULL),
  78   mSampleInfoCount(0)
  79 {
  80     *this=other;
  81 }
  82
  83 PluralRules::~PluralRules() {
  84     delete mRules;
  85     delete mParser;
  86     uprv_free(mSamples);
  87     uprv_free(mSampleInfo);
  88 }
  89
  90 PluralRules*
  91 PluralRules::clone() const {
  92     return new PluralRules(*this);
  93 }
  94
  95 PluralRules&
  96 PluralRules::operator=(const PluralRules& other) {
  97     if (this != &other) {
  98         delete mRules;
  99         if (other.mRules==NULL) {
 100             mRules = NULL;
 101         }
 102         else {
 103             mRules = new RuleChain(*other.mRules);
 104         }
 105         delete mParser;
 106         mParser = new RuleParser();
 107
 108         uprv_free(mSamples);
 109         mSamples = NULL;
 110
 111         uprv_free(mSampleInfo);
 112         mSampleInfo = NULL;
 113         mSampleInfoCount = 0;
 114     }
 115
 116     return *this;
 117 }
 118
 119 PluralRules* U_EXPORT2
 120 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
 121     RuleChain   rules;
 122
 123     if (U_FAILURE(status)) {
 124         return NULL;
 125     }
 126     PluralRules *newRules = new PluralRules(status);
 127     if ( (newRules != NULL)&& U_SUCCESS(status) ) {
 128         newRules->parseDescription((UnicodeString &)description, rules, status);
 129         if (U_SUCCESS(status)) {
 130             newRules->addRules(rules);
 131         }
 132     }
 133     if (U_FAILURE(status)) {
 134         delete newRules;
 135         return NULL;
 136     }
 137     else {
 138         return newRules;
 139     }
 140 }
 141
 142 PluralRules* U_EXPORT2
 143 PluralRules::createDefaultRules(UErrorCode& status) {
 144     return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
 145 }
 146
 147 PluralRules* U_EXPORT2
 148 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
 149     RuleChain   rChain;
 150     if (U_FAILURE(status)) {
 151         return NULL;
 152     }
 153     PluralRules *newObj = new PluralRules(status);
 154     if (newObj==NULL || U_FAILURE(status)) {
 155         delete newObj;
 156         return NULL;
 157     }
 158     UnicodeString locRule = newObj->getRuleFromResource(locale, status);
 159     if ((locRule.length() != 0) && U_SUCCESS(status)) {
 160         newObj->parseDescription(locRule, rChain, status);
 161         if (U_SUCCESS(status)) {
 162             newObj->addRules(rChain);
 163         }
 164     }
 165     if (U_FAILURE(status)||(locRule.length() == 0)) {
 166         // use default plural rule
 167         status = U_ZERO_ERROR;
 168         UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
 169         newObj->parseDescription(defRule, rChain, status);
 170         newObj->addRules(rChain);
 171     }
 172
 173     return newObj;
 174 }
 175
 176 UnicodeString
 177 PluralRules::select(int32_t number) const {
 178     if (mRules == NULL) {
 179         return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
 180     }
 181     else {
 182         return mRules->select(number);
 183     }
 184 }
 185
 186 UnicodeString
 187 PluralRules::select(double number) const {
 188     if (mRules == NULL) {
 189         return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
 190     }
 191     else {
 192         return mRules->select(number);
 193     }
 194 }
 195
 196 StringEnumeration*
 197 PluralRules::getKeywords(UErrorCode& status) const {
 198     if (U_FAILURE(status))  return NULL;
 199     StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
 200     if (U_FAILURE(status)) {
 201       delete nameEnumerator;
 202       return NULL;
 203     }
 204
 205     return nameEnumerator;
 206 }
 207
 208 double
 209 PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
 210   double val = 0.0;
 211   UErrorCode status = U_ZERO_ERROR;
 212   int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
 213   return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
 214 }
 215
 216 int32_t
 217 PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
 218                                  int32_t destCapacity, UErrorCode& error) {
 219     return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
 220 }
 221
 222 int32_t
 223 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
 224                         int32_t destCapacity, UErrorCode& status) {
 225     return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
 226 }
 227
 228 int32_t
 229 PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
 230                                 int32_t destCapacity, UBool includeUnlimited,
 231                                 UErrorCode& status) {
 232     initSamples(status);
 233     if (U_FAILURE(status)) {
 234         return -1;
 235     }
 236     if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
 237         status = U_ILLEGAL_ARGUMENT_ERROR;
 238         return -1;
 239     }
 240
 241     int32_t index = getKeywordIndex(keyword, status);
 242     if (index == -1) {
 243         return 0;
 244     }
 245
 246     const int32_t LIMIT_MASK = 0x1 << 31;
 247
 248     if (!includeUnlimited) {
 249         if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
 250             return -1;
 251         }
 252     }
 253
 254     int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
 255     int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
 256     int32_t len = limit - start;
 257     if (len <= destCapacity) {
 258         destCapacity = len;
 259     } else if (includeUnlimited) {
 260         len = destCapacity;  // no overflow, and don't report more than we copy
 261     } else {
 262         status = U_BUFFER_OVERFLOW_ERROR;
 263         return len;
 264     }
 265     for (int32_t i = 0; i < destCapacity; ++i, ++start) {
 266         dest[i] = mSamples[start];
 267     }
 268     return len;
 269 }
 270
 271
 272 UBool
 273 PluralRules::isKeyword(const UnicodeString& keyword) const {
 274     if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
 275         return true;
 276     }
 277     else {
 278         if (mRules==NULL) {
 279             return false;
 280         }
 281         else {
 282             return mRules->isKeyword(keyword);
 283         }
 284     }
 285 }
 286
 287 UnicodeString
 288 PluralRules::getKeywordOther() const {
 289     return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
 290 }
 291
 292 UBool
 293 PluralRules::operator==(const PluralRules& other) const  {
 294     int32_t limit;
 295     const UnicodeString *ptrKeyword;
 296     UErrorCode status= U_ZERO_ERROR;
 297
 298     if ( this == &other ) {
 299         return TRUE;
 300     }
 301     LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
 302     LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
 303     if (U_FAILURE(status)) {
 304         return FALSE;
 305     }
 306
 307     if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
 308         return FALSE;
 309     }
 310     myKeywordList->reset(status);
 311     while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
 312         if (!other.isKeyword(*ptrKeyword)) {
 313             return FALSE;
 314         }
 315     }
 316     otherKeywordList->reset(status);
 317     while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
 318         if (!this->isKeyword(*ptrKeyword)) {
 319             return FALSE;
 320         }
 321     }
 322     if (U_FAILURE(status)) {
 323         return FALSE;
 324     }
 325
 326     if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
 327         return FALSE;
 328     }
 329     UnicodeString myKeyword, otherKeyword;
 330     for (int32_t i=0; i<limit; ++i) {
 331         myKeyword = this->select(i);
 332         otherKeyword = other.select(i);
 333         if (myKeyword!=otherKeyword) {
 334             return FALSE;
 335         }
 336     }
 337     return TRUE;
 338 }
 339
 340 void
 341 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
 342 {
 343     int32_t ruleIndex=0;
 344     UnicodeString token;
 345     tokenType type;
 346     tokenType prevType=none;
 347     RuleChain *ruleChain=NULL;
 348     AndConstraint *curAndConstraint=NULL;
 349     OrConstraint *orNode=NULL;
 350     RuleChain *lastChain=NULL;
 351
 352     if (U_FAILURE(status)) {
 353         return;
 354     }
 355     UnicodeString ruleData = data.toLower("");
 356     while (ruleIndex< ruleData.length()) {
 357         mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
 358         if (U_FAILURE(status)) {
 359             return;
 360         }
 361         mParser->checkSyntax(prevType, type, status);
 362         if (U_FAILURE(status)) {
 363             return;
 364         }
 365         switch (type) {
 366         case tAnd:
 367             U_ASSERT(curAndConstraint != NULL);
 368             curAndConstraint = curAndConstraint->add();
 369             break;
 370         case tOr:
 371             lastChain = &rules;
 372             while (lastChain->next !=NULL) {
 373                 lastChain = lastChain->next;
 374             }
 375             orNode=lastChain->ruleHeader;
 376             while (orNode->next != NULL) {
 377                 orNode = orNode->next;
 378             }
 379             orNode->next= new OrConstraint();
 380             orNode=orNode->next;
 381             orNode->next=NULL;
 382             curAndConstraint = orNode->add();
 383             break;
 384         case tIs:
 385             U_ASSERT(curAndConstraint != NULL);
 386             curAndConstraint->rangeHigh=-1;
 387             break;
 388         case tNot:
 389             U_ASSERT(curAndConstraint != NULL);
 390             curAndConstraint->notIn=TRUE;
 391             break;
 392         case tIn:
 393             U_ASSERT(curAndConstraint != NULL);
 394             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
 395             curAndConstraint->integerOnly = TRUE;
 396             break;
 397         case tWithin:
 398             U_ASSERT(curAndConstraint != NULL);
 399             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
 400             break;
 401         case tNumber:
 402             U_ASSERT(curAndConstraint != NULL);
 403             if ( (curAndConstraint->op==AndConstraint::MOD)&&
 404                  (curAndConstraint->opNum == -1 ) ) {
 405                 curAndConstraint->opNum=getNumberValue(token);
 406             }
 407             else {
 408                 if (curAndConstraint->rangeLow == -1) {
 409                     curAndConstraint->rangeLow=getNumberValue(token);
 410                 }
 411                 else {
 412                     curAndConstraint->rangeHigh=getNumberValue(token);
 413                 }
 414             }
 415             break;
 416         case tMod:
 417             U_ASSERT(curAndConstraint != NULL);
 418             curAndConstraint->op=AndConstraint::MOD;
 419             break;
 420         case tKeyword:
 421             if (ruleChain==NULL) {
 422                 ruleChain = &rules;
 423             }
 424             else {
 425                 while (ruleChain->next!=NULL){
 426                     ruleChain=ruleChain->next;
 427                 }
 428                 ruleChain=ruleChain->next=new RuleChain();
 429             }
 430             if (ruleChain->ruleHeader != NULL) {
 431                 delete ruleChain->ruleHeader;
 432             }
 433             orNode = ruleChain->ruleHeader = new OrConstraint();
 434             curAndConstraint = orNode->add();
 435             ruleChain->keyword = token;
 436             break;
 437         default:
 438             break;
 439         }
 440         prevType=type;
 441     }
 442 }
 443
 444 int32_t
 445 PluralRules::getNumberValue(const UnicodeString& token) const {
 446     int32_t i;
 447     char digits[128];
 448
 449     i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
 450     digits[i]='\0';
 451
 452     return((int32_t)atoi(digits));
 453 }
 454
 455
 456 void
 457 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
 458     int32_t i=*curIndex;
 459
 460     localeName.remove();
 461     while (i< localeData.length()) {
 462        if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
 463            break;
 464        }
 465        i++;
 466     }
 467
 468     while (i< localeData.length()) {
 469        if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
 470            break;
 471        }
 472        localeName+=localeData.charAt(i++);
 473     }
 474     *curIndex=i;
 475 }
 476
 477
 478 int32_t
 479 PluralRules::getRepeatLimit() const {
 480     if (mRules!=NULL) {
 481         return mRules->getRepeatLimit();
 482     }
 483     else {
 484         return 0;
 485     }
 486 }
 487
 488 int32_t
 489 PluralRules::getKeywordIndex(const UnicodeString& keyword,
 490                              UErrorCode& status) const {
 491     if (U_SUCCESS(status)) {
 492         int32_t n = 0;
 493         RuleChain* rc = mRules;
 494         while (rc != NULL) {
 495             if (rc->ruleHeader != NULL) {
 496                 if (rc->keyword == keyword) {
 497                     return n;
 498                 }
 499                 ++n;
 500             }
 501             rc = rc->next;
 502         }
 503         if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
 504             return n;
 505         }
 506     }
 507     return -1;
 508 }
 509
 510 typedef struct SampleRecord {
 511     int32_t ruleIndex;
 512     double  value;
 513 } SampleRecord;
 514
 515 void
 516 PluralRules::initSamples(UErrorCode& status) {
 517     if (U_FAILURE(status)) {
 518         return;
 519     }
 520     Mutex lock(&pluralMutex);
 521
 522     if (mSamples) {
 523         return;
 524     }
 525
 526     // Note, the original design let you have multiple rules with the same keyword.  But
 527     // we don't use that in our data and existing functions in this implementation don't
 528     // fully support it (for example, the returned keywords is a list and not a set).
 529     //
 530     // So I don't support this here either.  If you ask for samples, or for all values,
 531     // you will get information about the first rule with that keyword, not all rules with
 532     // that keyword.
 533
 534     int32_t maxIndex = 0;
 535     int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
 536     RuleChain* rc = mRules;
 537     while (rc != NULL) {
 538         if (rc->ruleHeader != NULL) {
 539             if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
 540                 otherIndex = maxIndex;
 541             }
 542             ++maxIndex;
 543         }
 544         rc = rc->next;
 545     }
 546     if (otherIndex == -1) {
 547         ++maxIndex;
 548     }
 549
 550     LocalMemory<int32_t> newSampleInfo;
 551     if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
 552         status = U_MEMORY_ALLOCATION_ERROR;
 553         return;
 554     }
 555
 556     const int32_t LIMIT_MASK = 0x1 << 31;
 557
 558     rc = mRules;
 559     int32_t n = 0;
 560     while (rc != NULL) {
 561         if (rc->ruleHeader != NULL) {
 562             newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
 563         }
 564         rc = rc->next;
 565     }
 566     if (otherIndex == -1) {
 567         newSampleInfo[maxIndex - 1] = 0; // unlimited
 568     }
 569
 570     MaybeStackArray<SampleRecord, 10> newSamples;
 571     int32_t sampleCount = 0;
 572
 573     int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
 574     if (limit < 10) {
 575         limit = 10;
 576     }
 577
 578     for (int i = 0, keywordsRemaining = maxIndex;
 579           keywordsRemaining > 0 && i < limit;
 580           ++i) {
 581         double val = i / 2.0;
 582
 583         n = 0;
 584         rc = mRules;
 585         int32_t found = -1;
 586         while (rc != NULL) {
 587             if (rc->ruleHeader != NULL) {
 588                 if (rc->ruleHeader->isFulfilled(val)) {
 589                     found = n;
 590                     break;
 591                 }
 592                 ++n;
 593             }
 594             rc = rc->next;
 595         }
 596         if (found == -1) {
 597             // 'other'.  If there is an 'other' rule, the rule set is bad since nothing
 598             // should leak through, but we don't bother to report that here.
 599             found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
 600         }
 601         if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
 602             continue;
 603         }
 604         newSampleInfo[found] += 1; // won't impact limit flag
 605
 606         if (sampleCount == newSamples.getCapacity()) {
 607             int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
 608             if (NULL == newSamples.resize(newCapacity, sampleCount)) {
 609                 status = U_MEMORY_ALLOCATION_ERROR;
 610                 return;
 611             }
 612         }
 613         newSamples[sampleCount].ruleIndex = found;
 614         newSamples[sampleCount].value = val;
 615         ++sampleCount;
 616
 617         if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
 618             --keywordsRemaining;
 619         }
 620     }
 621
 622     // sort the values by index, leaving order otherwise unchanged
 623     // this is just a selection sort for simplicity
 624     LocalMemory<double> values;
 625     if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
 626         status = U_MEMORY_ALLOCATION_ERROR;
 627         return;
 628     }
 629     for (int i = 0, j = 0; i < maxIndex; ++i) {
 630         for (int k = 0; k < sampleCount; ++k) {
 631             if (newSamples[k].ruleIndex == i) {
 632                 values[j++] = newSamples[k].value;
 633             }
 634         }
 635     }
 636
 637     // convert array of mask/lengths to array of mask/limits
 638     limit = 0;
 639     for (int i = 0; i < maxIndex; ++i) {
 640         int32_t info = newSampleInfo[i];
 641         int32_t len = info & ~LIMIT_MASK;
 642         limit += len;
 643         // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
 644         // it's not really unlimited, so mark it as limited
 645         int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
 646         newSampleInfo[i] = limit | mask;
 647     }
 648
 649     // ok, we've got good data
 650     mSamples = values.orphan();
 651     mSampleInfo = newSampleInfo.orphan();
 652     mSampleInfoCount = maxIndex;
 653 }
 654
 655 void
 656 PluralRules::addRules(RuleChain& rules) {
 657     RuleChain *newRule = new RuleChain(rules);
 658     this->mRules=newRule;
 659     newRule->setRepeatLimit();
 660 }
 661
 662 UnicodeString
 663 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
 664     UnicodeString emptyStr;
 665
 666     if (U_FAILURE(errCode)) {
 667         return emptyStr;
 668     }
 669     UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
 670     if(U_FAILURE(errCode)) {
 671         /* total failure, not even root could be opened */
 672         return emptyStr;
 673     }
 674     UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
 675     if(U_FAILURE(errCode)) {
 676         ures_close(rb);
 677         return emptyStr;
 678     }
 679     int32_t resLen=0;
 680     const char *curLocaleName=locale.getName();
 681     const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
 682
 683     if (s == NULL) {
 684         // Check parent locales.
 685         UErrorCode status = U_ZERO_ERROR;
 686         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
 687         const char *curLocaleName=locale.getName();
 688         int32_t localeNameLen=0;
 689         uprv_strcpy(parentLocaleName, curLocaleName);
 690
 691         while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
 692                                        ULOC_FULLNAME_CAPACITY, &status)) > 0) {
 693             resLen=0;
 694             s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
 695             if (s != NULL) {
 696                 errCode = U_ZERO_ERROR;
 697                 break;
 698             }
 699             status = U_ZERO_ERROR;
 700         }
 701     }
 702     if (s==NULL) {
 703         ures_close(locRes);
 704         ures_close(rb);
 705         return emptyStr;
 706     }
 707
 708     char setKey[256];
 709     UChar result[256];
 710     u_UCharsToChars(s, setKey, resLen + 1);
 711     // printf("\n PluralRule: %s\n", setKey);
 712
 713
 714     UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
 715     if(U_FAILURE(errCode)) {
 716         ures_close(locRes);
 717         ures_close(rb);
 718         return emptyStr;
 719     }
 720     resLen=0;
 721     UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
 722     if (U_FAILURE(errCode)) {
 723         ures_close(ruleRes);
 724         ures_close(locRes);
 725         ures_close(rb);
 726         return emptyStr;
 727     }
 728
 729     int32_t numberKeys = ures_getSize(setRes);
 730     char *key=NULL;
 731     int32_t len=0;
 732     for(int32_t i=0; i<numberKeys; ++i) {
 733         int32_t keyLen;
 734         resLen=0;
 735         s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
 736         keyLen = (int32_t)uprv_strlen(key);
 737         u_charsToUChars(key, result+len, keyLen);
 738         len += keyLen;
 739         result[len++]=COLON;
 740         uprv_memcpy(result+len, s, resLen*sizeof(UChar));
 741         len += resLen;
 742         result[len++]=SEMI_COLON;
 743     }
 744     result[len++]=0;
 745     u_UCharsToChars(result, setKey, len);
 746     // printf(" Rule: %s\n", setKey);
 747
 748     ures_close(setRes);
 749     ures_close(ruleRes);
 750     ures_close(locRes);
 751     ures_close(rb);
 752     return UnicodeString(result);
 753 }
 754
 755 AndConstraint::AndConstraint() {
 756     op = AndConstraint::NONE;
 757     opNum=-1;
 758     rangeLow=-1;
 759     rangeHigh=-1;
 760     notIn=FALSE;
 761     integerOnly=FALSE;
 762     next=NULL;
 763 }
 764
 765
 766 AndConstraint::AndConstraint(const AndConstraint& other) {
 767     this->op = other.op;
 768     this->opNum=other.opNum;
 769     this->rangeLow=other.rangeLow;
 770     this->rangeHigh=other.rangeHigh;
 771     this->integerOnly=other.integerOnly;
 772     this->notIn=other.notIn;
 773     if (other.next==NULL) {
 774         this->next=NULL;
 775     }
 776     else {
 777         this->next = new AndConstraint(*other.next);
 778     }
 779 }
 780
 781 AndConstraint::~AndConstraint() {
 782     if (next!=NULL) {
 783         delete next;
 784     }
 785 }
 786
 787
 788 UBool
 789 AndConstraint::isFulfilled(double number) {
 790     UBool result=TRUE;
 791     double value=number;
 792
 793     // arrrrrrgh
 794     if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
 795       return notIn;
 796     }
 797
 798     if ( op == MOD ) {
 799         value = (int32_t)value % opNum;
 800     }
 801     if ( rangeHigh == -1 ) {
 802         if ( rangeLow == -1 ) {
 803             result = TRUE; // empty rule
 804         }
 805         else {
 806             if ( value == rangeLow ) {
 807                 result = TRUE;
 808             }
 809             else {
 810                 result = FALSE;
 811             }
 812         }
 813     }
 814     else {
 815         if ((rangeLow <= value) && (value <= rangeHigh)) {
 816             if (integerOnly) {
 817                 if ( value != (int32_t)value) {
 818                     result = FALSE;
 819                 }
 820                 else {
 821                     result = TRUE;
 822                 }
 823             }
 824             else {
 825                 result = TRUE;
 826             }
 827         }
 828         else {
 829             result = FALSE;
 830         }
 831     }
 832     if (notIn) {
 833         return !result;
 834     }
 835     else {
 836         return result;
 837     }
 838 }
 839
 840 UBool
 841 AndConstraint::isLimited() {
 842     return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
 843 }
 844
 845 int32_t
 846 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
 847
 848     if ( op == MOD ) {
 849         return uprv_max(opNum, maxLimit);
 850     }
 851     else {
 852         if ( rangeHigh == -1 ) {
 853             return uprv_max(rangeLow, maxLimit);
 854         }
 855         else{
 856             return uprv_max(rangeHigh, maxLimit);
 857         }
 858     }
 859 }
 860
 861
 862 AndConstraint*
 863 AndConstraint::add()
 864 {
 865     this->next = new AndConstraint();
 866     return this->next;
 867 }
 868
 869 OrConstraint::OrConstraint() {
 870     childNode=NULL;
 871     next=NULL;
 872 }
 873
 874 OrConstraint::OrConstraint(const OrConstraint& other) {
 875     if ( other.childNode == NULL ) {
 876         this->childNode = NULL;
 877     }
 878     else {
 879         this->childNode = new AndConstraint(*(other.childNode));
 880     }
 881     if (other.next == NULL ) {
 882         this->next = NULL;
 883     }
 884     else {
 885         this->next = new OrConstraint(*(other.next));
 886     }
 887 }
 888
 889 OrConstraint::~OrConstraint() {
 890     if (childNode!=NULL) {
 891         delete childNode;
 892     }
 893     if (next!=NULL) {
 894         delete next;
 895     }
 896 }
 897
 898 AndConstraint*
 899 OrConstraint::add()
 900 {
 901     OrConstraint *curOrConstraint=this;
 902     {
 903         while (curOrConstraint->next!=NULL) {
 904             curOrConstraint = curOrConstraint->next;
 905         }
 906         curOrConstraint->next = NULL;
 907         curOrConstraint->childNode = new AndConstraint();
 908     }
 909     return curOrConstraint->childNode;
 910 }
 911
 912 UBool
 913 OrConstraint::isFulfilled(double number) {
 914     OrConstraint* orRule=this;
 915     UBool result=FALSE;
 916
 917     while (orRule!=NULL && !result) {
 918         result=TRUE;
 919         AndConstraint* andRule = orRule->childNode;
 920         while (andRule!=NULL && result) {
 921             result = andRule->isFulfilled(number);
 922             andRule=andRule->next;
 923         }
 924         orRule = orRule->next;
 925     }
 926
 927     return result;
 928 }
 929
 930 UBool
 931 OrConstraint::isLimited() {
 932     for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
 933         UBool result = FALSE;
 934         for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
 935             if (andc->isLimited()) {
 936                 result = TRUE;
 937                 break;
 938             }
 939         }
 940         if (result == FALSE) {
 941             return FALSE;
 942         }
 943     }
 944     return TRUE;
 945 }
 946
 947 RuleChain::RuleChain() {
 948     ruleHeader=NULL;
 949     next = NULL;
 950     repeatLimit=0;
 951 }
 952
 953 RuleChain::RuleChain(const RuleChain& other) {
 954     this->repeatLimit = other.repeatLimit;
 955     this->keyword=other.keyword;
 956     if (other.ruleHeader != NULL) {
 957         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
 958     }
 959     else {
 960         this->ruleHeader = NULL;
 961     }
 962     if (other.next != NULL ) {
 963         this->next = new RuleChain(*other.next);
 964     }
 965     else
 966     {
 967         this->next = NULL;
 968     }
 969 }
 970
 971 RuleChain::~RuleChain() {
 972     if (next != NULL) {
 973         delete next;
 974     }
 975     if ( ruleHeader != NULL ) {
 976         delete ruleHeader;
 977     }
 978 }
 979
 980 UnicodeString
 981 RuleChain::select(double number) const {
 982
 983    if ( ruleHeader != NULL ) {
 984        if (ruleHeader->isFulfilled(number)) {
 985            return keyword;
 986        }
 987    }
 988    if ( next != NULL ) {
 989        return next->select(number);
 990    }
 991    else {
 992        return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
 993    }
 994
 995 }
 996
 997 void
 998 RuleChain::dumpRules(UnicodeString& result) {
 999     UChar digitString[16];
1000
1001     if ( ruleHeader != NULL ) {
1002         result +=  keyword;
1003         OrConstraint* orRule=ruleHeader;
1004         while ( orRule != NULL ) {
1005             AndConstraint* andRule=orRule->childNode;
1006             while ( andRule != NULL ) {
1007                 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
1008                     result += UNICODE_STRING_SIMPLE(" n is ");
1009                     if (andRule->notIn) {
1010                         result += UNICODE_STRING_SIMPLE("not ");
1011                     }
1012                     uprv_itou(digitString,16, andRule->rangeLow,10,0);
1013                     result += UnicodeString(digitString);
1014                 }
1015                 else {
1016                     if (andRule->op==AndConstraint::MOD) {
1017                         result += UNICODE_STRING_SIMPLE("  n mod ");
1018                         uprv_itou(digitString,16, andRule->opNum,10,0);
1019                         result += UnicodeString(digitString);
1020                     }
1021                     else {
1022                         result += UNICODE_STRING_SIMPLE("  n ");
1023                     }
1024                     if (andRule->rangeHigh==-1) {
1025                         if (andRule->notIn) {
1026                             result += UNICODE_STRING_SIMPLE(" is not ");
1027                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
1028                             result += UnicodeString(digitString);
1029                         }
1030                         else {
1031                             result += UNICODE_STRING_SIMPLE(" is ");
1032                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
1033                             result += UnicodeString(digitString);
1034                         }
1035                     }
1036                     else {
1037                         if (andRule->notIn) {
1038                             if ( andRule->integerOnly ) {
1039                                 result += UNICODE_STRING_SIMPLE("  not in ");
1040                             }
1041                             else {
1042                                 result += UNICODE_STRING_SIMPLE("  not within ");
1043                             }
1044                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
1045                             result += UnicodeString(digitString);
1046                             result += UNICODE_STRING_SIMPLE(" .. ");
1047                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1048                             result += UnicodeString(digitString);
1049                         }
1050                         else {
1051                             if ( andRule->integerOnly ) {
1052                                 result += UNICODE_STRING_SIMPLE(" in ");
1053                             }
1054                             else {
1055                                 result += UNICODE_STRING_SIMPLE(" within ");
1056                             }
1057                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
1058                             result += UnicodeString(digitString);
1059                             result += UNICODE_STRING_SIMPLE(" .. ");
1060                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1061                         }
1062                     }
1063                 }
1064                 if ( (andRule=andRule->next) != NULL) {
1065                     result.append(PK_AND, 3);
1066                 }
1067             }
1068             if ( (orRule = orRule->next) != NULL ) {
1069                 result.append(PK_OR, 2);
1070             }
1071         }
1072     }
1073     if ( next != NULL ) {
1074         next->dumpRules(result);
1075     }
1076 }
1077
1078 int32_t
1079 RuleChain::getRepeatLimit () {
1080     return repeatLimit;
1081 }
1082
1083 void
1084 RuleChain::setRepeatLimit () {
1085     int32_t limit=0;
1086
1087     if ( next != NULL ) {
1088         next->setRepeatLimit();
1089         limit = next->repeatLimit;
1090     }
1091
1092     if ( ruleHeader != NULL ) {
1093         OrConstraint* orRule=ruleHeader;
1094         while ( orRule != NULL ) {
1095             AndConstraint* andRule=orRule->childNode;
1096             while ( andRule != NULL ) {
1097                 limit = andRule->updateRepeatLimit(limit);
1098                 andRule = andRule->next;
1099             }
1100             orRule = orRule->next;
1101         }
1102     }
1103     repeatLimit = limit;
1104 }
1105
1106 UErrorCode
1107 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1108     if ( arraySize < capacityOfKeywords-1 ) {
1109         keywords[arraySize++]=keyword;
1110     }
1111     else {
1112         return U_BUFFER_OVERFLOW_ERROR;
1113     }
1114
1115     if ( next != NULL ) {
1116         return next->getKeywords(capacityOfKeywords, keywords, arraySize);
1117     }
1118     else {
1119         return U_ZERO_ERROR;
1120     }
1121 }
1122
1123 UBool
1124 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1125     if ( keyword == keywordParam ) {
1126         return TRUE;
1127     }
1128
1129     if ( next != NULL ) {
1130         return next->isKeyword(keywordParam);
1131     }
1132     else {
1133         return FALSE;
1134     }
1135 }
1136
1137
1138 RuleParser::RuleParser() {
1139 }
1140
1141 RuleParser::~RuleParser() {
1142 }
1143
1144 void
1145 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
1146 {
1147     if (U_FAILURE(status)) {
1148         return;
1149     }
1150     switch(prevType) {
1151     case none:
1152     case tSemiColon:
1153         if (curType!=tKeyword) {
1154             status = U_UNEXPECTED_TOKEN;
1155         }
1156         break;
1157     case tVariableN :
1158         if (curType != tIs && curType != tMod && curType != tIn &&
1159             curType != tNot && curType != tWithin) {
1160             status = U_UNEXPECTED_TOKEN;
1161         }
1162         break;
1163     case tZero:
1164     case tOne:
1165     case tTwo:
1166     case tFew:
1167     case tMany:
1168     case tOther:
1169     case tKeyword:
1170         if (curType != tColon) {
1171             status = U_UNEXPECTED_TOKEN;
1172         }
1173         break;
1174     case tColon :
1175         if (curType != tVariableN) {
1176             status = U_UNEXPECTED_TOKEN;
1177         }
1178         break;
1179     case tIs:
1180         if ( curType != tNumber && curType != tNot) {
1181             status = U_UNEXPECTED_TOKEN;
1182         }
1183         break;
1184     case tNot:
1185         if (curType != tNumber && curType != tIn && curType != tWithin) {
1186             status = U_UNEXPECTED_TOKEN;
1187         }
1188         break;
1189     case tMod:
1190     case tDot:
1191     case tIn:
1192     case tWithin:
1193     case tAnd:
1194     case tOr:
1195         if (curType != tNumber && curType != tVariableN) {
1196             status = U_UNEXPECTED_TOKEN;
1197         }
1198         break;
1199     case tNumber:
1200         if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
1201             curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
1202         {
1203             status = U_UNEXPECTED_TOKEN;
1204         }
1205         break;
1206     default:
1207         status = U_UNEXPECTED_TOKEN;
1208         break;
1209     }
1210 }
1211
1212 void
1213 RuleParser::getNextToken(const UnicodeString& ruleData,
1214                          int32_t *ruleIndex,
1215                          UnicodeString& token,
1216                          tokenType& type,
1217                          UErrorCode &status)
1218 {
1219     int32_t curIndex= *ruleIndex;
1220     UChar ch;
1221     tokenType prevType=none;
1222
1223     if (U_FAILURE(status)) {
1224         return;
1225     }
1226     while (curIndex<ruleData.length()) {
1227         ch = ruleData.charAt(curIndex);
1228         if ( !inRange(ch, type) ) {
1229             status = U_ILLEGAL_CHARACTER;
1230             return;
1231         }
1232         switch (type) {
1233         case tSpace:
1234             if ( *ruleIndex != curIndex ) { // letter
1235                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1236                 *ruleIndex=curIndex;
1237                 type=prevType;
1238                 getKeyType(token, type, status);
1239                 return;
1240             }
1241             else {
1242                 *ruleIndex=*ruleIndex+1;
1243             }
1244             break; // consective space
1245         case tColon:
1246         case tSemiColon:
1247             if ( *ruleIndex != curIndex ) {
1248                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1249                 *ruleIndex=curIndex;
1250                 type=prevType;
1251                 getKeyType(token, type, status);
1252                 return;
1253             }
1254             else {
1255                 *ruleIndex=curIndex+1;
1256                 return;
1257             }
1258         case tLetter:
1259              if ((type==prevType)||(prevType==none)) {
1260                 prevType=type;
1261                 break;
1262              }
1263              break;
1264         case tNumber:
1265              if ((type==prevType)||(prevType==none)) {
1266                 prevType=type;
1267                 break;
1268              }
1269              else {
1270                 *ruleIndex=curIndex+1;
1271                 return;
1272              }
1273          case tDot:
1274              if (prevType==none) {  // first dot
1275                 prevType=type;
1276                 continue;
1277              }
1278              else {
1279                  if ( *ruleIndex != curIndex ) {
1280                     token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1281                     *ruleIndex=curIndex;  // letter
1282                     type=prevType;
1283                     getKeyType(token, type, status);
1284                     return;
1285                  }
1286                  else {  // two consective dots
1287                     *ruleIndex=curIndex+2;
1288                     return;
1289                  }
1290              }
1291              break;
1292          default:
1293              status = U_UNEXPECTED_TOKEN;
1294              return;
1295         }
1296         curIndex++;
1297     }
1298     if ( curIndex>=ruleData.length() ) {
1299         if ( (type == tLetter)||(type == tNumber) ) {
1300             token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1301             getKeyType(token, type, status);
1302             if (U_FAILURE(status)) {
1303                 return;
1304             }
1305         }
1306         *ruleIndex = ruleData.length();
1307     }
1308 }
1309
1310 UBool
1311 RuleParser::inRange(UChar ch, tokenType& type) {
1312     if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1313         // we assume all characters are in lower case already.
1314         return FALSE;
1315     }
1316     if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1317         type = tLetter;
1318         return TRUE;
1319     }
1320     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1321         type = tNumber;
1322         return TRUE;
1323     }
1324     switch (ch) {
1325     case COLON:
1326         type = tColon;
1327         return TRUE;
1328     case SPACE:
1329         type = tSpace;
1330         return TRUE;
1331     case SEMI_COLON:
1332         type = tSemiColon;
1333         return TRUE;
1334     case DOT:
1335         type = tDot;
1336         return TRUE;
1337     default :
1338         type = none;
1339         return FALSE;
1340     }
1341 }
1342
1343
1344 void
1345 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1346 {
1347     if (U_FAILURE(status)) {
1348         return;
1349     }
1350     if ( keyType==tNumber) {
1351     }
1352     else if (0 == token.compare(PK_VAR_N, 1)) {
1353         keyType = tVariableN;
1354     }
1355     else if (0 == token.compare(PK_IS, 2)) {
1356         keyType = tIs;
1357     }
1358     else if (0 == token.compare(PK_AND, 3)) {
1359         keyType = tAnd;
1360     }
1361     else if (0 == token.compare(PK_IN, 2)) {
1362         keyType = tIn;
1363     }
1364     else if (0 == token.compare(PK_WITHIN, 6)) {
1365         keyType = tWithin;
1366     }
1367     else if (0 == token.compare(PK_NOT, 3)) {
1368         keyType = tNot;
1369     }
1370     else if (0 == token.compare(PK_MOD, 3)) {
1371         keyType = tMod;
1372     }
1373     else if (0 == token.compare(PK_OR, 2)) {
1374         keyType = tOr;
1375     }
1376     else if ( isValidKeyword(token) ) {
1377         keyType = tKeyword;
1378     }
1379     else {
1380         status = U_UNEXPECTED_TOKEN;
1381     }
1382 }
1383
1384 UBool
1385 RuleParser::isValidKeyword(const UnicodeString& token) {
1386     return PatternProps::isIdentifier(token.getBuffer(), token.length());
1387 }
1388
1389 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1390         : pos(0), fKeywordNames(status) {
1391     if (U_FAILURE(status)) {
1392         return;
1393     }
1394     fKeywordNames.setDeleter(uprv_deleteUObject);
1395     UBool  addKeywordOther=TRUE;
1396     RuleChain *node=header;
1397     while(node!=NULL) {
1398         fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1399         if (U_FAILURE(status)) {
1400             return;
1401         }
1402         if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1403             addKeywordOther= FALSE;
1404         }
1405         node=node->next;
1406     }
1407
1408     if (addKeywordOther) {
1409         fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1410     }
1411 }
1412
1413 const UnicodeString*
1414 PluralKeywordEnumeration::snext(UErrorCode& status) {
1415     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1416         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1417     }
1418     return NULL;
1419 }
1420
1421 void
1422 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1423     pos=0;
1424 }
1425
1426 int32_t
1427 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1428        return fKeywordNames.size();
1429 }
1430
1431 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1432 }
1433
1434 U_NAMESPACE_END
1435
1436
1437 #endif /* #if !UCONFIG_NO_FORMATTING */
1438
1439 //eof