icuSources/i18n/plurrule.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 2007-2010, International Business Machines Corporation and
   4 * others. All Rights Reserved.
   5 *******************************************************************************
   6 *
   7 * File PLURRULE.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *******************************************************************************
  13 */
  14
  15
  16 #include "unicode/uniset.h"
  17 #include "unicode/utypes.h"
  18 #include "unicode/ures.h"
  19 #include "unicode/plurrule.h"
  20 #include "cmemory.h"
  21 #include "cstring.h"
  22 #include "hash.h"
  23 #include "mutex.h"
  24 #include "plurrule_impl.h"
  25 #include "putilimp.h"
  26 #include "ucln_in.h"
  27 #include "ustrfmt.h"
  28 #include "locutil.h"
  29
  30 /*
  31 // TODO(claireho): remove stdio
  32 #include "stdio.h"
  33 */
  34
  35 #if !UCONFIG_NO_FORMATTING
  36
  37 U_NAMESPACE_BEGIN
  38
  39
  40 #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
  41
  42 static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
  43 static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
  44 static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
  45 static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
  46 static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
  47 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
  48 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
  49 static const UChar PK_IN[]={LOW_I,LOW_N,0};
  50 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
  51 static const UChar PK_IS[]={LOW_I,LOW_S,0};
  52 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
  53 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
  54 static const UChar PK_OR[]={LOW_O,LOW_R,0};
  55 static const UChar PK_VAR_N[]={LOW_N,0};
  56 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
  57
  58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
  59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
  60
  61 PluralRules::PluralRules(UErrorCode& status)
  62 :   UObject(),
  63     mRules(NULL)
  64 {
  65     if (U_FAILURE(status)) {
  66         return;
  67     }
  68     mParser = new RuleParser();
  69     if (mParser==NULL) {
  70         status = U_MEMORY_ALLOCATION_ERROR;
  71     }
  72 }
  73
  74 PluralRules::PluralRules(const PluralRules& other)
  75 : UObject(other),
  76     mRules(NULL),
  77     mParser(new RuleParser())
  78 {
  79     *this=other;
  80 }
  81
  82 PluralRules::~PluralRules() {
  83     delete mRules;
  84     delete mParser;
  85 }
  86
  87 PluralRules*
  88 PluralRules::clone() const {
  89     return new PluralRules(*this);
  90 }
  91
  92 PluralRules&
  93 PluralRules::operator=(const PluralRules& other) {
  94     if (this != &other) {
  95         delete mRules;
  96         if (other.mRules==NULL) {
  97             mRules = NULL;
  98         }
  99         else {
 100             mRules = new RuleChain(*other.mRules);
 101         }
 102         delete mParser;
 103         mParser = new RuleParser();
 104     }
 105
 106     return *this;
 107 }
 108
 109 PluralRules* U_EXPORT2
 110 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
 111     RuleChain   rules;
 112
 113     if (U_FAILURE(status)) {
 114         return NULL;
 115     }
 116     PluralRules *newRules = new PluralRules(status);
 117     if ( (newRules != NULL)&& U_SUCCESS(status) ) {
 118         newRules->parseDescription((UnicodeString &)description, rules, status);
 119         if (U_SUCCESS(status)) {
 120             newRules->addRules(rules);
 121         }
 122     }
 123     if (U_FAILURE(status)) {
 124         delete newRules;
 125         return NULL;
 126     }
 127     else {
 128         return newRules;
 129     }
 130 }
 131
 132 PluralRules* U_EXPORT2
 133 PluralRules::createDefaultRules(UErrorCode& status) {
 134     return createRules(PLURAL_DEFAULT_RULE, status);
 135 }
 136
 137 PluralRules* U_EXPORT2
 138 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
 139     RuleChain   rChain;
 140     if (U_FAILURE(status)) {
 141         return NULL;
 142     }
 143     PluralRules *newObj = new PluralRules(status);
 144     if (newObj==NULL || U_FAILURE(status)) {
 145         return NULL;
 146     }
 147     UnicodeString locRule = newObj->getRuleFromResource(locale, status);
 148     if ((locRule.length() != 0) && U_SUCCESS(status)) {
 149         newObj->parseDescription(locRule, rChain, status);
 150         if (U_SUCCESS(status)) {
 151             newObj->addRules(rChain);
 152         }
 153     }
 154     if (U_FAILURE(status)||(locRule.length() == 0)) {
 155         // use default plural rule
 156         status = U_ZERO_ERROR;
 157         UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
 158         newObj->parseDescription(defRule, rChain, status);
 159         newObj->addRules(rChain);
 160     }
 161
 162     return newObj;
 163 }
 164
 165 UnicodeString
 166 PluralRules::select(int32_t number) const {
 167     if (mRules == NULL) {
 168         return PLURAL_DEFAULT_RULE;
 169     }
 170     else {
 171         return mRules->select(number);
 172     }
 173 }
 174
 175 UnicodeString
 176 PluralRules::select(double number) const {
 177     if (mRules == NULL) {
 178         return PLURAL_DEFAULT_RULE;
 179     }
 180     else {
 181         return mRules->select(number);
 182     }
 183 }
 184
 185 StringEnumeration*
 186 PluralRules::getKeywords(UErrorCode& status) const {
 187     if (U_FAILURE(status))  return NULL;
 188     StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
 189     if (U_FAILURE(status))  return NULL;
 190
 191     return nameEnumerator;
 192 }
 193
 194
 195 UBool
 196 PluralRules::isKeyword(const UnicodeString& keyword) const {
 197     if ( keyword == PLURAL_KEYWORD_OTHER ) {
 198         return true;
 199     }
 200     else {
 201         if (mRules==NULL) {
 202             return false;
 203         }
 204         else {
 205             return mRules->isKeyword(keyword);
 206         }
 207     }
 208 }
 209
 210 UnicodeString
 211 PluralRules::getKeywordOther() const {
 212     return PLURAL_KEYWORD_OTHER;
 213 }
 214
 215 UBool
 216 PluralRules::operator==(const PluralRules& other) const  {
 217     int32_t limit;
 218     UBool sameList = TRUE;
 219     const UnicodeString *ptrKeyword;
 220     UErrorCode status= U_ZERO_ERROR;
 221
 222     if ( this == &other ) {
 223         return TRUE;
 224     }
 225     StringEnumeration* myKeywordList = getKeywords(status);
 226     if (U_FAILURE(status)) {
 227         return FALSE;
 228     }
 229     StringEnumeration* otherKeywordList =other.getKeywords(status);
 230     if (U_FAILURE(status)) {
 231         return FALSE;
 232     }
 233
 234     if (myKeywordList->count(status)!=otherKeywordList->count(status) ||
 235         U_FAILURE(status)) {
 236         sameList = FALSE;
 237     }
 238     else {
 239         myKeywordList->reset(status);
 240         if (U_FAILURE(status)) {
 241             return FALSE;
 242         }
 243         while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
 244             if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) {
 245                 sameList = FALSE;
 246             }
 247         }
 248         otherKeywordList->reset(status);
 249         if (U_FAILURE(status)) {
 250             return FALSE;
 251         }
 252         while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
 253             if (U_FAILURE(status)) {
 254                 return FALSE;
 255             }
 256             if (!this->isKeyword(*ptrKeyword))  {
 257                 sameList = FALSE;
 258             }
 259         }
 260         delete myKeywordList;
 261         delete otherKeywordList;
 262         if (!sameList) {
 263             return FALSE;
 264         }
 265     }
 266
 267     if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
 268         return FALSE;
 269     }
 270     UnicodeString myKeyword, otherKeyword;
 271     for (int32_t i=0; i<limit; ++i) {
 272         myKeyword = this->select(i);
 273         otherKeyword = other.select(i);
 274         if (myKeyword!=otherKeyword) {
 275             return FALSE;
 276         }
 277     }
 278     return TRUE;
 279 }
 280
 281 void
 282 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
 283 {
 284     int32_t ruleIndex=0;
 285     UnicodeString token;
 286     tokenType type;
 287     tokenType prevType=none;
 288     RuleChain *ruleChain=NULL;
 289     AndConstraint *curAndConstraint=NULL;
 290     OrConstraint *orNode=NULL;
 291     RuleChain *lastChain=NULL;
 292
 293     if (U_FAILURE(status)) {
 294         return;
 295     }
 296     UnicodeString ruleData = data.toLower();
 297     while (ruleIndex< ruleData.length()) {
 298         mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
 299         if (U_FAILURE(status)) {
 300             return;
 301         }
 302         mParser->checkSyntax(prevType, type, status);
 303         if (U_FAILURE(status)) {
 304             return;
 305         }
 306         switch (type) {
 307         case tAnd:
 308             curAndConstraint = curAndConstraint->add();
 309             break;
 310         case tOr:
 311             lastChain = &rules;
 312             while (lastChain->next !=NULL) {
 313                 lastChain = lastChain->next;
 314             }
 315             orNode=lastChain->ruleHeader;
 316             while (orNode->next != NULL) {
 317                 orNode = orNode->next;
 318             }
 319             orNode->next= new OrConstraint();
 320             orNode=orNode->next;
 321             orNode->next=NULL;
 322             curAndConstraint = orNode->add();
 323             break;
 324         case tIs:
 325             curAndConstraint->rangeHigh=-1;
 326             break;
 327         case tNot:
 328             curAndConstraint->notIn=TRUE;
 329             break;
 330         case tIn:
 331             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
 332             curAndConstraint->integerOnly = TRUE;
 333             break;
 334         case tWithin:
 335             curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
 336             break;
 337         case tNumber:
 338             if ( (curAndConstraint->op==AndConstraint::MOD)&&
 339                  (curAndConstraint->opNum == -1 ) ) {
 340                 curAndConstraint->opNum=getNumberValue(token);
 341             }
 342             else {
 343                 if (curAndConstraint->rangeLow == -1) {
 344                     curAndConstraint->rangeLow=getNumberValue(token);
 345                 }
 346                 else {
 347                     curAndConstraint->rangeHigh=getNumberValue(token);
 348                 }
 349             }
 350             break;
 351         case tMod:
 352             curAndConstraint->op=AndConstraint::MOD;
 353             break;
 354         case tKeyword:
 355             if (ruleChain==NULL) {
 356                 ruleChain = &rules;
 357             }
 358             else {
 359                 while (ruleChain->next!=NULL){
 360                     ruleChain=ruleChain->next;
 361                 }
 362                 ruleChain=ruleChain->next=new RuleChain();
 363             }
 364             orNode = ruleChain->ruleHeader = new OrConstraint();
 365             curAndConstraint = orNode->add();
 366             ruleChain->keyword = token;
 367             break;
 368         default:
 369             break;
 370         }
 371         prevType=type;
 372     }
 373 }
 374
 375 int32_t
 376 PluralRules::getNumberValue(const UnicodeString& token) const {
 377     int32_t i;
 378     char digits[128];
 379
 380     i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
 381     digits[i]='\0';
 382
 383     return((int32_t)atoi(digits));
 384 }
 385
 386
 387 void
 388 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
 389     int32_t i=*curIndex;
 390
 391     localeName.remove();
 392     while (i< localeData.length()) {
 393        if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
 394            break;
 395        }
 396        i++;
 397     }
 398
 399     while (i< localeData.length()) {
 400        if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
 401            break;
 402        }
 403        localeName+=localeData.charAt(i++);
 404     }
 405     *curIndex=i;
 406 }
 407
 408
 409 int32_t
 410 PluralRules::getRepeatLimit() const {
 411     if (mRules!=NULL) {
 412         return mRules->getRepeatLimit();
 413     }
 414     else {
 415         return 0;
 416     }
 417 }
 418
 419
 420 void
 421 PluralRules::addRules(RuleChain& rules) {
 422     RuleChain *newRule = new RuleChain(rules);
 423     this->mRules=newRule;
 424     newRule->setRepeatLimit();
 425 }
 426
 427 UnicodeString
 428 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
 429     UnicodeString emptyStr;
 430
 431     if (U_FAILURE(errCode)) {
 432         return emptyStr;
 433     }
 434     UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
 435     if(U_FAILURE(errCode)) {
 436         /* total failure, not even root could be opened */
 437         return emptyStr;
 438     }
 439     UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
 440     if(U_FAILURE(errCode)) {
 441         ures_close(rb);
 442         return emptyStr;
 443     }
 444     int32_t resLen=0;
 445     const char *curLocaleName=locale.getName();
 446     const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
 447
 448     if (s == NULL) {
 449         // Check parent locales.
 450         UErrorCode status = U_ZERO_ERROR;
 451         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
 452         const char *curLocaleName=locale.getName();
 453         int32_t localeNameLen=0;
 454         uprv_strcpy(parentLocaleName, curLocaleName);
 455
 456         while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
 457                                        ULOC_FULLNAME_CAPACITY, &status)) > 0) {
 458             resLen=0;
 459             s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
 460             if (s != NULL) {
 461                 errCode = U_ZERO_ERROR;
 462                 break;
 463             }
 464             status = U_ZERO_ERROR;
 465         }
 466     }
 467     if (s==NULL) {
 468         ures_close(locRes);
 469         ures_close(rb);
 470         return emptyStr;
 471     }
 472
 473     char setKey[256];
 474     UChar result[256];
 475     u_UCharsToChars(s, setKey, resLen + 1);
 476     // printf("\n PluralRule: %s\n", setKey);
 477
 478
 479     UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
 480     if(U_FAILURE(errCode)) {
 481         ures_close(locRes);
 482         ures_close(rb);
 483         return emptyStr;
 484     }
 485     resLen=0;
 486     UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
 487     if (U_FAILURE(errCode)) {
 488         ures_close(ruleRes);
 489         ures_close(locRes);
 490         ures_close(rb);
 491         return emptyStr;
 492     }
 493
 494     int32_t numberKeys = ures_getSize(setRes);
 495     char *key=NULL;
 496     int32_t len=0;
 497     for(int32_t i=0; i<numberKeys; ++i) {
 498         int32_t keyLen;
 499         resLen=0;
 500         s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
 501         keyLen = (int32_t)uprv_strlen(key);
 502         u_charsToUChars(key, result+len, keyLen);
 503         len += keyLen;
 504         result[len++]=COLON;
 505         uprv_memcpy(result+len, s, resLen*sizeof(UChar));
 506         len += resLen;
 507         result[len++]=SEMI_COLON;
 508     }
 509     result[len++]=0;
 510     u_UCharsToChars(result, setKey, len);
 511     // printf(" Rule: %s\n", setKey);
 512
 513     ures_close(setRes);
 514     ures_close(ruleRes);
 515     ures_close(locRes);
 516     ures_close(rb);
 517     return UnicodeString(result);
 518
 519 }
 520
 521 AndConstraint::AndConstraint() {
 522     op = AndConstraint::NONE;
 523     opNum=-1;
 524     rangeLow=-1;
 525     rangeHigh=-1;
 526     notIn=FALSE;
 527     integerOnly=FALSE;
 528     next=NULL;
 529 }
 530
 531
 532 AndConstraint::AndConstraint(const AndConstraint& other) {
 533     this->op = other.op;
 534     this->opNum=other.opNum;
 535     this->rangeLow=other.rangeLow;
 536     this->rangeHigh=other.rangeHigh;
 537     this->integerOnly=other.integerOnly;
 538     this->notIn=other.notIn;
 539     if (other.next==NULL) {
 540         this->next=NULL;
 541     }
 542     else {
 543         this->next = new AndConstraint(*other.next);
 544     }
 545 }
 546
 547 AndConstraint::~AndConstraint() {
 548     if (next!=NULL) {
 549         delete next;
 550     }
 551 }
 552
 553
 554 UBool
 555 AndConstraint::isFulfilled(double number) {
 556     UBool result=TRUE;
 557     double value=number;
 558
 559     if ( op == MOD ) {
 560         value = (int32_t)value % opNum;
 561     }
 562     if ( rangeHigh == -1 ) {
 563         if ( rangeLow == -1 ) {
 564             result = TRUE; // empty rule
 565         }
 566         else {
 567             if ( value == rangeLow ) {
 568                 result = TRUE;
 569             }
 570             else {
 571                 result = FALSE;
 572             }
 573         }
 574     }
 575     else {
 576         if ((rangeLow <= value) && (value <= rangeHigh)) {
 577             if (integerOnly) {
 578                 if ( value != (int32_t)value) {
 579                     result = FALSE;
 580                 }
 581                 else {
 582                     result = TRUE;
 583                 }
 584             }
 585             else {
 586                 result = TRUE;
 587             }
 588         }
 589         else {
 590             result = FALSE;
 591         }
 592     }
 593     if (notIn) {
 594         return !result;
 595     }
 596     else {
 597         return result;
 598     }
 599 }
 600
 601 int32_t
 602 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
 603
 604     if ( op == MOD ) {
 605         return uprv_max(opNum, maxLimit);
 606     }
 607     else {
 608         if ( rangeHigh == -1 ) {
 609             return uprv_max(rangeLow, maxLimit);
 610         }
 611         else{
 612             return uprv_max(rangeHigh, maxLimit);
 613         }
 614     }
 615 }
 616
 617
 618 AndConstraint*
 619 AndConstraint::add()
 620 {
 621     this->next = new AndConstraint();
 622     return this->next;
 623 }
 624
 625 OrConstraint::OrConstraint() {
 626     childNode=NULL;
 627     next=NULL;
 628 }
 629
 630 OrConstraint::OrConstraint(const OrConstraint& other) {
 631     if ( other.childNode == NULL ) {
 632         this->childNode = NULL;
 633     }
 634     else {
 635         this->childNode = new AndConstraint(*(other.childNode));
 636     }
 637     if (other.next == NULL ) {
 638         this->next = NULL;
 639     }
 640     else {
 641         this->next = new OrConstraint(*(other.next));
 642     }
 643 }
 644
 645 OrConstraint::~OrConstraint() {
 646     if (childNode!=NULL) {
 647         delete childNode;
 648     }
 649     if (next!=NULL) {
 650         delete next;
 651     }
 652 }
 653
 654 AndConstraint*
 655 OrConstraint::add()
 656 {
 657     OrConstraint *curOrConstraint=this;
 658     {
 659         while (curOrConstraint->next!=NULL) {
 660             curOrConstraint = curOrConstraint->next;
 661         }
 662         curOrConstraint->next = NULL;
 663         curOrConstraint->childNode = new AndConstraint();
 664     }
 665     return curOrConstraint->childNode;
 666 }
 667
 668 UBool
 669 OrConstraint::isFulfilled(double number) {
 670     OrConstraint* orRule=this;
 671     UBool result=FALSE;
 672
 673     while (orRule!=NULL && !result) {
 674         result=TRUE;
 675         AndConstraint* andRule = orRule->childNode;
 676         while (andRule!=NULL && result) {
 677             result = andRule->isFulfilled(number);
 678             andRule=andRule->next;
 679         }
 680         orRule = orRule->next;
 681     }
 682
 683     return result;
 684 }
 685
 686
 687 RuleChain::RuleChain() {
 688     ruleHeader=NULL;
 689     next = NULL;
 690     repeatLimit=0;
 691 }
 692
 693 RuleChain::RuleChain(const RuleChain& other) {
 694     this->repeatLimit = other.repeatLimit;
 695     this->keyword=other.keyword;
 696     if (other.ruleHeader != NULL) {
 697         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
 698     }
 699     else {
 700         this->ruleHeader = NULL;
 701     }
 702     if (other.next != NULL ) {
 703         this->next = new RuleChain(*other.next);
 704     }
 705     else
 706     {
 707         this->next = NULL;
 708     }
 709 }
 710
 711 RuleChain::~RuleChain() {
 712     if (next != NULL) {
 713         delete next;
 714     }
 715     if ( ruleHeader != NULL ) {
 716         delete ruleHeader;
 717     }
 718 }
 719
 720 UnicodeString
 721 RuleChain::select(double number) const {
 722
 723    if ( ruleHeader != NULL ) {
 724        if (ruleHeader->isFulfilled(number)) {
 725            return keyword;
 726        }
 727    }
 728    if ( next != NULL ) {
 729        return next->select(number);
 730    }
 731    else {
 732        return PLURAL_KEYWORD_OTHER;
 733    }
 734
 735 }
 736
 737 void
 738 RuleChain::dumpRules(UnicodeString& result) {
 739     UChar digitString[16];
 740
 741     if ( ruleHeader != NULL ) {
 742         result +=  keyword;
 743         OrConstraint* orRule=ruleHeader;
 744         while ( orRule != NULL ) {
 745             AndConstraint* andRule=orRule->childNode;
 746             while ( andRule != NULL ) {
 747                 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
 748                     result += UNICODE_STRING_SIMPLE(" n is ");
 749                     if (andRule->notIn) {
 750                         result += UNICODE_STRING_SIMPLE("not ");
 751                     }
 752                     uprv_itou(digitString,16, andRule->rangeLow,10,0);
 753                     result += UnicodeString(digitString);
 754                 }
 755                 else {
 756                     if (andRule->op==AndConstraint::MOD) {
 757                         result += UNICODE_STRING_SIMPLE("  n mod ");
 758                         uprv_itou(digitString,16, andRule->opNum,10,0);
 759                         result += UnicodeString(digitString);
 760                     }
 761                     else {
 762                         result += UNICODE_STRING_SIMPLE("  n ");
 763                     }
 764                     if (andRule->rangeHigh==-1) {
 765                         if (andRule->notIn) {
 766                             result += UNICODE_STRING_SIMPLE(" is not ");
 767                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
 768                             result += UnicodeString(digitString);
 769                         }
 770                         else {
 771                             result += UNICODE_STRING_SIMPLE(" is ");
 772                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
 773                             result += UnicodeString(digitString);
 774                         }
 775                     }
 776                     else {
 777                         if (andRule->notIn) {
 778                             if ( andRule->integerOnly ) {
 779                                 result += UNICODE_STRING_SIMPLE("  not in ");
 780                             }
 781                             else {
 782                                 result += UNICODE_STRING_SIMPLE("  not within ");
 783                             }
 784                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
 785                             result += UnicodeString(digitString);
 786                             result += UNICODE_STRING_SIMPLE(" .. ");
 787                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
 788                             result += UnicodeString(digitString);
 789                         }
 790                         else {
 791                             if ( andRule->integerOnly ) {
 792                                 result += UNICODE_STRING_SIMPLE(" in ");
 793                             }
 794                             else {
 795                                 result += UNICODE_STRING_SIMPLE(" within ");
 796                             }
 797                             uprv_itou(digitString,16, andRule->rangeLow,10,0);
 798                             result += UnicodeString(digitString);
 799                             result += UNICODE_STRING_SIMPLE(" .. ");
 800                             uprv_itou(digitString,16, andRule->rangeHigh,10,0);
 801                         }
 802                     }
 803                 }
 804                 if ( (andRule=andRule->next) != NULL) {
 805                     result += PK_AND;
 806                 }
 807             }
 808             if ( (orRule = orRule->next) != NULL ) {
 809                 result += PK_OR;
 810             }
 811         }
 812     }
 813     if ( next != NULL ) {
 814         next->dumpRules(result);
 815     }
 816 }
 817
 818 int32_t
 819 RuleChain::getRepeatLimit () {
 820     return repeatLimit;
 821 }
 822
 823 void
 824 RuleChain::setRepeatLimit () {
 825     int32_t limit=0;
 826
 827     if ( next != NULL ) {
 828         next->setRepeatLimit();
 829         limit = next->repeatLimit;
 830     }
 831
 832     if ( ruleHeader != NULL ) {
 833         OrConstraint* orRule=ruleHeader;
 834         while ( orRule != NULL ) {
 835             AndConstraint* andRule=orRule->childNode;
 836             while ( andRule != NULL ) {
 837                 limit = andRule->updateRepeatLimit(limit);
 838                 andRule = andRule->next;
 839             }
 840             orRule = orRule->next;
 841         }
 842     }
 843     repeatLimit = limit;
 844 }
 845
 846 UErrorCode
 847 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
 848     if ( arraySize < capacityOfKeywords-1 ) {
 849         keywords[arraySize++]=keyword;
 850     }
 851     else {
 852         return U_BUFFER_OVERFLOW_ERROR;
 853     }
 854
 855     if ( next != NULL ) {
 856         return next->getKeywords(capacityOfKeywords, keywords, arraySize);
 857     }
 858     else {
 859         return U_ZERO_ERROR;
 860     }
 861 }
 862
 863 UBool
 864 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
 865     if ( keyword == keywordParam ) {
 866         return TRUE;
 867     }
 868
 869     if ( next != NULL ) {
 870         return next->isKeyword(keywordParam);
 871     }
 872     else {
 873         return FALSE;
 874     }
 875 }
 876
 877
 878 RuleParser::RuleParser() {
 879     UErrorCode err=U_ZERO_ERROR;
 880     const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
 881     const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
 882     idStartFilter = new UnicodeSet(idStart, err);
 883     idContinueFilter = new UnicodeSet(idContinue, err);
 884 }
 885
 886 RuleParser::~RuleParser() {
 887     delete idStartFilter;
 888     delete idContinueFilter;
 889 }
 890
 891 void
 892 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
 893 {
 894     if (U_FAILURE(status)) {
 895         return;
 896     }
 897     switch(prevType) {
 898     case none:
 899     case tSemiColon:
 900         if (curType!=tKeyword) {
 901             status = U_UNEXPECTED_TOKEN;
 902         }
 903         break;
 904     case tVariableN :
 905         if (curType != tIs && curType != tMod && curType != tIn &&
 906             curType != tNot && curType != tWithin) {
 907             status = U_UNEXPECTED_TOKEN;
 908         }
 909         break;
 910     case tZero:
 911     case tOne:
 912     case tTwo:
 913     case tFew:
 914     case tMany:
 915     case tOther:
 916     case tKeyword:
 917         if (curType != tColon) {
 918             status = U_UNEXPECTED_TOKEN;
 919         }
 920         break;
 921     case tColon :
 922         if (curType != tVariableN) {
 923             status = U_UNEXPECTED_TOKEN;
 924         }
 925         break;
 926     case tIs:
 927         if ( curType != tNumber && curType != tNot) {
 928             status = U_UNEXPECTED_TOKEN;
 929         }
 930         break;
 931     case tNot:
 932         if (curType != tNumber && curType != tIn && curType != tWithin) {
 933             status = U_UNEXPECTED_TOKEN;
 934         }
 935         break;
 936     case tMod:
 937     case tDot:
 938     case tIn:
 939     case tWithin:
 940     case tAnd:
 941     case tOr:
 942         if (curType != tNumber && curType != tVariableN) {
 943             status = U_UNEXPECTED_TOKEN;
 944         }
 945         break;
 946     case tNumber:
 947         if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
 948             curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
 949         {
 950             status = U_UNEXPECTED_TOKEN;
 951         }
 952         break;
 953     default:
 954         status = U_UNEXPECTED_TOKEN;
 955         break;
 956     }
 957 }
 958
 959 void
 960 RuleParser::getNextToken(const UnicodeString& ruleData,
 961                          int32_t *ruleIndex,
 962                          UnicodeString& token,
 963                          tokenType& type,
 964                          UErrorCode &status)
 965 {
 966     int32_t curIndex= *ruleIndex;
 967     UChar ch;
 968     tokenType prevType=none;
 969
 970     if (U_FAILURE(status)) {
 971         return;
 972     }
 973     while (curIndex<ruleData.length()) {
 974         ch = ruleData.charAt(curIndex);
 975         if ( !inRange(ch, type) ) {
 976             status = U_ILLEGAL_CHARACTER;
 977             return;
 978         }
 979         switch (type) {
 980         case tSpace:
 981             if ( *ruleIndex != curIndex ) { // letter
 982                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
 983                 *ruleIndex=curIndex;
 984                 type=prevType;
 985                 getKeyType(token, type, status);
 986                 return;
 987             }
 988             else {
 989                 *ruleIndex=*ruleIndex+1;
 990             }
 991             break; // consective space
 992         case tColon:
 993         case tSemiColon:
 994             if ( *ruleIndex != curIndex ) {
 995                 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
 996                 *ruleIndex=curIndex;
 997                 type=prevType;
 998                 getKeyType(token, type, status);
 999                 return;
1000             }
1001             else {
1002                 *ruleIndex=curIndex+1;
1003                 return;
1004             }
1005         case tLetter:
1006              if ((type==prevType)||(prevType==none)) {
1007                 prevType=type;
1008                 break;
1009              }
1010              break;
1011         case tNumber:
1012              if ((type==prevType)||(prevType==none)) {
1013                 prevType=type;
1014                 break;
1015              }
1016              else {
1017                 *ruleIndex=curIndex+1;
1018                 return;
1019              }
1020          case tDot:
1021              if (prevType==none) {  // first dot
1022                 prevType=type;
1023                 continue;
1024              }
1025              else {
1026                  if ( *ruleIndex != curIndex ) {
1027                     token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1028                     *ruleIndex=curIndex;  // letter
1029                     type=prevType;
1030                     getKeyType(token, type, status);
1031                     return;
1032                  }
1033                  else {  // two consective dots
1034                     *ruleIndex=curIndex+2;
1035                     return;
1036                  }
1037              }
1038              break;
1039          default:
1040              status = U_UNEXPECTED_TOKEN;
1041              return;
1042         }
1043         curIndex++;
1044     }
1045     if ( curIndex>=ruleData.length() ) {
1046         if ( (type == tLetter)||(type == tNumber) ) {
1047             token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1048             getKeyType(token, type, status);
1049             if (U_FAILURE(status)) {
1050                 return;
1051             }
1052         }
1053         *ruleIndex = ruleData.length();
1054     }
1055 }
1056
1057 UBool
1058 RuleParser::inRange(UChar ch, tokenType& type) {
1059     if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1060         // we assume all characters are in lower case already.
1061         return FALSE;
1062     }
1063     if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1064         type = tLetter;
1065         return TRUE;
1066     }
1067     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1068         type = tNumber;
1069         return TRUE;
1070     }
1071     switch (ch) {
1072     case COLON:
1073         type = tColon;
1074         return TRUE;
1075     case SPACE:
1076         type = tSpace;
1077         return TRUE;
1078     case SEMI_COLON:
1079         type = tSemiColon;
1080         return TRUE;
1081     case DOT:
1082         type = tDot;
1083         return TRUE;
1084     default :
1085         type = none;
1086         return FALSE;
1087     }
1088 }
1089
1090
1091 void
1092 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1093 {
1094     if (U_FAILURE(status)) {
1095         return;
1096     }
1097     if ( keyType==tNumber) {
1098     }
1099     else if (token==PK_VAR_N) {
1100         keyType = tVariableN;
1101     }
1102     else if (token==PK_IS) {
1103         keyType = tIs;
1104     }
1105     else if (token==PK_AND) {
1106         keyType = tAnd;
1107     }
1108     else if (token==PK_IN) {
1109         keyType = tIn;
1110     }
1111     else if (token==PK_WITHIN) {
1112         keyType = tWithin;
1113     }
1114     else if (token==PK_NOT) {
1115         keyType = tNot;
1116     }
1117     else if (token==PK_MOD) {
1118         keyType = tMod;
1119     }
1120     else if (token==PK_OR) {
1121         keyType = tOr;
1122     }
1123     else if ( isValidKeyword(token) ) {
1124         keyType = tKeyword;
1125     }
1126     else {
1127         status = U_UNEXPECTED_TOKEN;
1128     }
1129 }
1130
1131 UBool
1132 RuleParser::isValidKeyword(const UnicodeString& token) {
1133     if ( token.length()==0 ) {
1134         return FALSE;
1135     }
1136     if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
1137         int32_t i;
1138         for (i=1; i< token.length(); i++) {
1139             if (idContinueFilter->contains(token.charAt(i))== FALSE) {
1140                 return FALSE;
1141             }
1142         }
1143         return TRUE;
1144     }
1145     else {
1146         return FALSE;
1147     }
1148 }
1149
1150 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
1151 fKeywordNames(status)
1152 {
1153     RuleChain *node=header;
1154     UBool  addKeywordOther=true;
1155
1156     if (U_FAILURE(status)) {
1157         return;
1158     }
1159     pos=0;
1160     fKeywordNames.removeAllElements();
1161     while(node!=NULL) {
1162         fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1163         if (U_FAILURE(status)) {
1164             return;
1165         }
1166         if (node->keyword == PLURAL_KEYWORD_OTHER) {
1167             addKeywordOther= false;
1168         }
1169         node=node->next;
1170     }
1171
1172     if (addKeywordOther) {
1173         fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1174         if (U_FAILURE(status)) {
1175             return;
1176         }
1177     }
1178 }
1179
1180 const UnicodeString*
1181 PluralKeywordEnumeration::snext(UErrorCode& status) {
1182     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1183         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1184     }
1185     return NULL;
1186 }
1187
1188 void
1189 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1190     pos=0;
1191 }
1192
1193 int32_t
1194 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1195        return fKeywordNames.size();
1196 }
1197
1198 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1199     UnicodeString *s;
1200     for (int32_t i=0; i<fKeywordNames.size(); ++i) {
1201         if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
1202             delete s;
1203         }
1204     }
1205 }
1206
1207 U_NAMESPACE_END
1208
1209
1210 #endif /* #if !UCONFIG_NO_FORMATTING */
1211
1212 //eof