]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/plurrule.cpp
ICU-400.38.tar.gz
[apple/icu.git] / icuSources / i18n / plurrule.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 2007-2008, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File PLURRULE.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 *******************************************************************************
13 */
14
15
16 #include "unicode/uniset.h"
17 #include "unicode/utypes.h"
18 #include "unicode/ures.h"
19 #include "unicode/plurrule.h"
20 #include "cmemory.h"
21 #include "cstring.h"
22 #include "hash.h"
23 #include "mutex.h"
24 #include "plurrule_impl.h"
25 #include "putilimp.h"
26 #include "ucln_in.h"
27 #include "ustrfmt.h"
28 #include "locutil.h"
29
30 /*
31 // TODO(claireho): remove stdio
32 #include "stdio.h"
33 */
34
35 #if !UCONFIG_NO_FORMATTING
36
37 U_NAMESPACE_BEGIN
38
39
40 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
41
42 static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
43 static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
44 static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
45 static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
46 static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
47 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
48 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
49 static const UChar PK_IN[]={LOW_I,LOW_N,0};
50 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
51 static const UChar PK_IS[]={LOW_I,LOW_S,0};
52 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
53 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
54 static const UChar PK_OR[]={LOW_O,LOW_R,0};
55 static const UChar PK_VAR_N[]={LOW_N,0};
56 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
57
58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
60
61 PluralRules::PluralRules(UErrorCode& status)
62 : UObject(),
63 mRules(NULL),
64 mParser(new RuleParser())
65 {
66 if (mParser==NULL) {
67 status = U_MEMORY_ALLOCATION_ERROR;
68 }
69 }
70
71 PluralRules::PluralRules(const PluralRules& other)
72 : UObject(other),
73 mRules(NULL),
74 mParser(new RuleParser())
75 {
76 *this=other;
77 }
78
79 PluralRules::~PluralRules() {
80 delete mRules;
81 delete mParser;
82 }
83
84 PluralRules*
85 PluralRules::clone() const {
86 return new PluralRules(*this);
87 }
88
89 PluralRules&
90 PluralRules::operator=(const PluralRules& other) {
91 if (this != &other) {
92 delete mRules;
93 if (other.mRules==NULL) {
94 mRules = NULL;
95 }
96 else {
97 mRules = new RuleChain(*other.mRules);
98 }
99 delete mParser;
100 mParser = new RuleParser();
101 }
102
103 return *this;
104 }
105
106 PluralRules* U_EXPORT2
107 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
108 RuleChain rules;
109
110 PluralRules *newRules = new PluralRules(status);
111 if ( (newRules != NULL)&& U_SUCCESS(status) ) {
112 newRules->parseDescription((UnicodeString &)description, rules, status);
113 if (U_SUCCESS(status)) {
114 newRules->addRules(rules);
115 }
116 }
117 if (U_FAILURE(status)) {
118 delete newRules;
119 return NULL;
120 }
121 else {
122 return newRules;
123 }
124 }
125
126 PluralRules* U_EXPORT2
127 PluralRules::createDefaultRules(UErrorCode& status) {
128 return createRules(PLURAL_DEFAULT_RULE, status);
129 }
130
131 PluralRules* U_EXPORT2
132 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
133 RuleChain rChain;
134 status = U_ZERO_ERROR;
135 PluralRules *newObj = new PluralRules(status);
136 if (newObj==NULL) {
137 return NULL;
138 }
139 UnicodeString locRule = newObj->getRuleFromResource(locale, status);
140 if ((locRule.length() != 0) && U_SUCCESS(status)) {
141 newObj->parseDescription(locRule, rChain, status);
142 if (U_SUCCESS(status)) {
143 newObj->addRules(rChain);
144 }
145 }
146 if (U_FAILURE(status)||(locRule.length() == 0)) {
147 // use default plural rule
148 status = U_ZERO_ERROR;
149 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
150 newObj->parseDescription(defRule, rChain, status);
151 newObj->addRules(rChain);
152 }
153
154 return newObj;
155 }
156
157 UnicodeString
158 PluralRules::select(int32_t number) const {
159 if (mRules == NULL) {
160 return PLURAL_DEFAULT_RULE;
161 }
162 else {
163 return mRules->select(number);
164 }
165 }
166
167 UnicodeString
168 PluralRules::select(double number) const {
169 if (mRules == NULL) {
170 return PLURAL_DEFAULT_RULE;
171 }
172 else {
173 return mRules->select(number);
174 }
175 }
176
177 StringEnumeration*
178 PluralRules::getKeywords(UErrorCode& status) const {
179 if (U_FAILURE(status)) return NULL;
180 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
181 return nameEnumerator;
182 }
183
184
185 UBool
186 PluralRules::isKeyword(const UnicodeString& keyword) const {
187 if ( keyword == PLURAL_KEYWORD_OTHER ) {
188 return true;
189 }
190 else {
191 if (mRules==NULL) {
192 return false;
193 }
194 else {
195 return mRules->isKeyword(keyword);
196 }
197 }
198 }
199
200 UnicodeString
201 PluralRules::getKeywordOther() const {
202 return PLURAL_KEYWORD_OTHER;
203 }
204
205 UBool
206 PluralRules::operator==(const PluralRules& other) const {
207 int32_t limit;
208 UBool sameList = TRUE;
209 const UnicodeString *ptrKeyword;
210 UErrorCode status= U_ZERO_ERROR;
211
212 if ( this == &other ) {
213 return TRUE;
214 }
215 StringEnumeration* myKeywordList = getKeywords(status);
216 StringEnumeration* otherKeywordList =other.getKeywords(status);
217
218 if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
219 sameList = FALSE;
220 }
221 else {
222 myKeywordList->reset(status);
223 while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
224 if (!other.isKeyword(*ptrKeyword)) {
225 sameList = FALSE;
226 }
227 }
228 otherKeywordList->reset(status);
229 while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
230 if (!this->isKeyword(*ptrKeyword)) {
231 sameList = FALSE;
232 }
233 }
234 delete myKeywordList;
235 delete otherKeywordList;
236 if (!sameList) {
237 return FALSE;
238 }
239 }
240
241 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
242 return FALSE;
243 }
244 UnicodeString myKeyword, otherKeyword;
245 for (int32_t i=0; i<limit; ++i) {
246 myKeyword = this->select(i);
247 otherKeyword = other.select(i);
248 if (myKeyword!=otherKeyword) {
249 return FALSE;
250 }
251 }
252 return TRUE;
253 }
254
255 void
256 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
257 {
258 int32_t ruleIndex=0;
259 UnicodeString token;
260 tokenType type;
261 tokenType prevType=none;
262 RuleChain *ruleChain=NULL;
263 AndConstraint *curAndConstraint=NULL;
264 OrConstraint *orNode=NULL;
265 RuleChain *lastChain=NULL;
266
267 UnicodeString ruleData = data.toLower();
268 while (ruleIndex< ruleData.length()) {
269 mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
270 if (U_FAILURE(status)) {
271 return;
272 }
273 mParser->checkSyntax(prevType, type, status);
274 if (U_FAILURE(status)) {
275 return;
276 }
277 switch (type) {
278 case tAnd:
279 curAndConstraint = curAndConstraint->add();
280 break;
281 case tOr:
282 lastChain = &rules;
283 while (lastChain->next !=NULL) {
284 lastChain = lastChain->next;
285 }
286 orNode=lastChain->ruleHeader;
287 while (orNode->next != NULL) {
288 orNode = orNode->next;
289 }
290 orNode->next= new OrConstraint();
291 orNode=orNode->next;
292 orNode->next=NULL;
293 curAndConstraint = orNode->add();
294 break;
295 case tIs:
296 curAndConstraint->rangeHigh=-1;
297 break;
298 case tNot:
299 curAndConstraint->notIn=TRUE;
300 break;
301 case tIn:
302 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
303 curAndConstraint->integerOnly = TRUE;
304 break;
305 case tWithin:
306 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
307 break;
308 case tNumber:
309 if ( (curAndConstraint->op==AndConstraint::MOD)&&
310 (curAndConstraint->opNum == -1 ) ) {
311 curAndConstraint->opNum=getNumberValue(token);
312 }
313 else {
314 if (curAndConstraint->rangeLow == -1) {
315 curAndConstraint->rangeLow=getNumberValue(token);
316 }
317 else {
318 curAndConstraint->rangeHigh=getNumberValue(token);
319 }
320 }
321 break;
322 case tMod:
323 curAndConstraint->op=AndConstraint::MOD;
324 break;
325 case tKeyword:
326 if (ruleChain==NULL) {
327 ruleChain = &rules;
328 }
329 else {
330 while (ruleChain->next!=NULL){
331 ruleChain=ruleChain->next;
332 }
333 ruleChain=ruleChain->next=new RuleChain();
334 }
335 orNode = ruleChain->ruleHeader = new OrConstraint();
336 curAndConstraint = orNode->add();
337 ruleChain->keyword = token;
338 break;
339 default:
340 break;
341 }
342 prevType=type;
343 }
344 }
345
346 int32_t
347 PluralRules::getNumberValue(const UnicodeString& token) const {
348 int32_t i;
349 char digits[128];
350
351 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
352 digits[i]='\0';
353
354 return((int32_t)atoi(digits));
355 }
356
357
358 void
359 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
360 int32_t i=*curIndex;
361
362 localeName.remove();
363 while (i< localeData.length()) {
364 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
365 break;
366 }
367 i++;
368 }
369
370 while (i< localeData.length()) {
371 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
372 break;
373 }
374 localeName+=localeData.charAt(i++);
375 }
376 *curIndex=i;
377 }
378
379
380 int32_t
381 PluralRules::getRepeatLimit() const {
382 if (mRules!=NULL) {
383 return mRules->getRepeatLimit();
384 }
385 else {
386 return 0;
387 }
388 }
389
390
391 void
392 PluralRules::addRules(RuleChain& rules) {
393 RuleChain *newRule = new RuleChain(rules);
394 this->mRules=newRule;
395 newRule->setRepeatLimit();
396 }
397
398 UnicodeString
399 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
400 UnicodeString emptyStr;
401
402 errCode = U_ZERO_ERROR;
403 UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
404 if(U_FAILURE(errCode)) {
405 /* total failure, not even root could be opened */
406 return emptyStr;
407 }
408 UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
409 if(U_FAILURE(errCode)) {
410 ures_close(rb);
411 return emptyStr;
412 }
413 int32_t resLen=0;
414 const char *curLocaleName=locale.getName();
415 const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
416
417 if (s == NULL) {
418 // Check parent locales.
419 UErrorCode status = U_ZERO_ERROR;
420 char parentLocaleName[ULOC_FULLNAME_CAPACITY];
421 const char *curLocaleName=locale.getName();
422 int32_t localeNameLen=0;
423 uprv_strcpy(parentLocaleName, curLocaleName);
424
425 while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
426 ULOC_FULLNAME_CAPACITY, &status)) > 0) {
427 resLen=0;
428 s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
429 if (s != NULL) {
430 errCode = U_ZERO_ERROR;
431 break;
432 }
433 status = U_ZERO_ERROR;
434 }
435 }
436 if (s==NULL) {
437 ures_close(locRes);
438 ures_close(rb);
439 return emptyStr;
440 }
441
442 char setKey[256];
443 UChar result[256];
444 u_UCharsToChars(s, setKey, resLen + 1);
445 // printf("\n PluralRule: %s\n", setKey);
446
447
448 UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
449 if(U_FAILURE(errCode)) {
450 ures_close(locRes);
451 ures_close(rb);
452 return emptyStr;
453 }
454 resLen=0;
455 UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
456 if (U_FAILURE(errCode)) {
457 ures_close(ruleRes);
458 ures_close(locRes);
459 ures_close(rb);
460 return emptyStr;
461 }
462
463 int32_t numberKeys = ures_getSize(setRes);
464 char *key=NULL;
465 int32_t len=0;
466 for(int32_t i=0; i<numberKeys; ++i) {
467 int32_t keyLen;
468 resLen=0;
469 s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
470 keyLen = uprv_strlen(key);
471 u_charsToUChars(key, result+len, keyLen);
472 len += keyLen;
473 result[len++]=COLON;
474 uprv_memcpy(result+len, s, resLen*sizeof(UChar));
475 len += resLen;
476 result[len++]=SEMI_COLON;
477 }
478 result[len++]=0;
479 u_UCharsToChars(result, setKey, len);
480 // printf(" Rule: %s\n", setKey);
481
482 ures_close(setRes);
483 ures_close(ruleRes);
484 ures_close(locRes);
485 ures_close(rb);
486 return UnicodeString(result);
487
488 }
489
490 AndConstraint::AndConstraint() {
491 op = AndConstraint::NONE;
492 opNum=-1;
493 rangeLow=-1;
494 rangeHigh=-1;
495 notIn=FALSE;
496 integerOnly=FALSE;
497 next=NULL;
498 }
499
500
501 AndConstraint::AndConstraint(const AndConstraint& other) {
502 this->op = other.op;
503 this->opNum=other.opNum;
504 this->rangeLow=other.rangeLow;
505 this->rangeHigh=other.rangeHigh;
506 this->integerOnly=other.integerOnly;
507 this->notIn=other.notIn;
508 if (other.next==NULL) {
509 this->next=NULL;
510 }
511 else {
512 this->next = new AndConstraint(*other.next);
513 }
514 }
515
516 AndConstraint::~AndConstraint() {
517 if (next!=NULL) {
518 delete next;
519 }
520 }
521
522
523 UBool
524 AndConstraint::isFulfilled(double number) {
525 UBool result=TRUE;
526 double value=number;
527
528 if ( op == MOD ) {
529 value = (int32_t)value % opNum;
530 }
531 if ( rangeHigh == -1 ) {
532 if ( rangeLow == -1 ) {
533 result = TRUE; // empty rule
534 }
535 else {
536 if ( value == rangeLow ) {
537 result = TRUE;
538 }
539 else {
540 result = FALSE;
541 }
542 }
543 }
544 else {
545 if ((rangeLow <= value) && (value <= rangeHigh)) {
546 if (integerOnly) {
547 if ( value != (int32_t)value) {
548 result = FALSE;
549 }
550 else {
551 result = TRUE;
552 }
553 }
554 else {
555 result = TRUE;
556 }
557 }
558 else {
559 result = FALSE;
560 }
561 }
562 if (notIn) {
563 return !result;
564 }
565 else {
566 return result;
567 }
568 }
569
570 int32_t
571 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
572
573 if ( op == MOD ) {
574 return uprv_max(opNum, maxLimit);
575 }
576 else {
577 if ( rangeHigh == -1 ) {
578 return(rangeLow>maxLimit? rangeLow : maxLimit);
579 return uprv_max(rangeLow, maxLimit);
580 }
581 else{
582 return uprv_max(rangeHigh, maxLimit);
583 }
584 }
585 }
586
587
588 AndConstraint*
589 AndConstraint::add()
590 {
591 this->next = new AndConstraint();
592 return this->next;
593 }
594
595 OrConstraint::OrConstraint() {
596 childNode=NULL;
597 next=NULL;
598 }
599
600 OrConstraint::OrConstraint(const OrConstraint& other) {
601 if ( other.childNode == NULL ) {
602 this->childNode = NULL;
603 }
604 else {
605 this->childNode = new AndConstraint(*(other.childNode));
606 }
607 if (other.next == NULL ) {
608 this->next = NULL;
609 }
610 else {
611 this->next = new OrConstraint(*(other.next));
612 }
613 }
614
615 OrConstraint::~OrConstraint() {
616 if (childNode!=NULL) {
617 delete childNode;
618 }
619 if (next!=NULL) {
620 delete next;
621 }
622 }
623
624 AndConstraint*
625 OrConstraint::add()
626 {
627 OrConstraint *curOrConstraint=this;
628 {
629 while (curOrConstraint->next!=NULL) {
630 curOrConstraint = curOrConstraint->next;
631 }
632 curOrConstraint->next = NULL;
633 curOrConstraint->childNode = new AndConstraint();
634 }
635 return curOrConstraint->childNode;
636 }
637
638 UBool
639 OrConstraint::isFulfilled(double number) {
640 OrConstraint* orRule=this;
641 UBool result=FALSE;
642
643 while (orRule!=NULL && !result) {
644 result=TRUE;
645 AndConstraint* andRule = orRule->childNode;
646 while (andRule!=NULL && result) {
647 result = andRule->isFulfilled(number);
648 andRule=andRule->next;
649 }
650 orRule = orRule->next;
651 }
652
653 return result;
654 }
655
656
657 RuleChain::RuleChain() {
658 ruleHeader=NULL;
659 next = NULL;
660 repeatLimit=0;
661 }
662
663 RuleChain::RuleChain(const RuleChain& other) {
664 this->repeatLimit = other.repeatLimit;
665 this->keyword=other.keyword;
666 if (other.ruleHeader != NULL) {
667 this->ruleHeader = new OrConstraint(*(other.ruleHeader));
668 }
669 else {
670 this->ruleHeader = NULL;
671 }
672 if (other.next != NULL ) {
673 this->next = new RuleChain(*other.next);
674 }
675 else
676 {
677 this->next = NULL;
678 }
679 }
680
681 RuleChain::~RuleChain() {
682 if (next != NULL) {
683 delete next;
684 }
685 if ( ruleHeader != NULL ) {
686 delete ruleHeader;
687 }
688 }
689
690 UnicodeString
691 RuleChain::select(double number) const {
692
693 if ( ruleHeader != NULL ) {
694 if (ruleHeader->isFulfilled(number)) {
695 return keyword;
696 }
697 }
698 if ( next != NULL ) {
699 return next->select(number);
700 }
701 else {
702 return PLURAL_KEYWORD_OTHER;
703 }
704
705 }
706
707 void
708 RuleChain::dumpRules(UnicodeString& result) {
709 UChar digitString[16];
710
711 if ( ruleHeader != NULL ) {
712 result += keyword;
713 OrConstraint* orRule=ruleHeader;
714 while ( orRule != NULL ) {
715 AndConstraint* andRule=orRule->childNode;
716 while ( andRule != NULL ) {
717 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
718 result += UNICODE_STRING_SIMPLE(" n is ");
719 if (andRule->notIn) {
720 result += UNICODE_STRING_SIMPLE("not ");
721 }
722 uprv_itou(digitString,16, andRule->rangeLow,10,0);
723 result += UnicodeString(digitString);
724 }
725 else {
726 if (andRule->op==AndConstraint::MOD) {
727 result += UNICODE_STRING_SIMPLE(" n mod ");
728 uprv_itou(digitString,16, andRule->opNum,10,0);
729 result += UnicodeString(digitString);
730 }
731 else {
732 result += UNICODE_STRING_SIMPLE(" n ");
733 }
734 if (andRule->rangeHigh==-1) {
735 if (andRule->notIn) {
736 result += UNICODE_STRING_SIMPLE(" is not ");
737 uprv_itou(digitString,16, andRule->rangeLow,10,0);
738 result += UnicodeString(digitString);
739 }
740 else {
741 result += UNICODE_STRING_SIMPLE(" is ");
742 uprv_itou(digitString,16, andRule->rangeLow,10,0);
743 result += UnicodeString(digitString);
744 }
745 }
746 else {
747 if (andRule->notIn) {
748 if ( andRule->integerOnly ) {
749 result += UNICODE_STRING_SIMPLE(" not in ");
750 }
751 else {
752 result += UNICODE_STRING_SIMPLE(" not within ");
753 }
754 uprv_itou(digitString,16, andRule->rangeLow,10,0);
755 result += UnicodeString(digitString);
756 result += UNICODE_STRING_SIMPLE(" .. ");
757 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
758 result += UnicodeString(digitString);
759 }
760 else {
761 if ( andRule->integerOnly ) {
762 result += UNICODE_STRING_SIMPLE(" in ");
763 }
764 else {
765 result += UNICODE_STRING_SIMPLE(" within ");
766 }
767 uprv_itou(digitString,16, andRule->rangeLow,10,0);
768 result += UnicodeString(digitString);
769 result += UNICODE_STRING_SIMPLE(" .. ");
770 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
771 }
772 }
773 }
774 if ( (andRule=andRule->next) != NULL) {
775 result += PK_AND;
776 }
777 }
778 if ( (orRule = orRule->next) != NULL ) {
779 result += PK_OR;
780 }
781 }
782 }
783 if ( next != NULL ) {
784 next->dumpRules(result);
785 }
786 }
787
788 int32_t
789 RuleChain::getRepeatLimit () {
790 return repeatLimit;
791 }
792
793 void
794 RuleChain::setRepeatLimit () {
795 int32_t limit=0;
796
797 if ( next != NULL ) {
798 next->setRepeatLimit();
799 limit = next->repeatLimit;
800 }
801
802 if ( ruleHeader != NULL ) {
803 OrConstraint* orRule=ruleHeader;
804 while ( orRule != NULL ) {
805 AndConstraint* andRule=orRule->childNode;
806 while ( andRule != NULL ) {
807 limit = andRule->updateRepeatLimit(limit);
808 andRule = andRule->next;
809 }
810 orRule = orRule->next;
811 }
812 }
813 repeatLimit = limit;
814 }
815
816 UErrorCode
817 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
818 if ( arraySize < capacityOfKeywords-1 ) {
819 keywords[arraySize++]=keyword;
820 }
821 else {
822 return U_BUFFER_OVERFLOW_ERROR;
823 }
824
825 if ( next != NULL ) {
826 return next->getKeywords(capacityOfKeywords, keywords, arraySize);
827 }
828 else {
829 return U_ZERO_ERROR;
830 }
831 }
832
833 UBool
834 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
835 if ( keyword == keywordParam ) {
836 return TRUE;
837 }
838
839 if ( next != NULL ) {
840 return next->isKeyword(keywordParam);
841 }
842 else {
843 return FALSE;
844 }
845 }
846
847
848 RuleParser::RuleParser() {
849 UErrorCode err=U_ZERO_ERROR;
850 const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
851 const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
852 idStartFilter = new UnicodeSet(idStart, err);
853 idContinueFilter = new UnicodeSet(idContinue, err);
854 }
855
856 RuleParser::~RuleParser() {
857 delete idStartFilter;
858 delete idContinueFilter;
859 }
860
861 void
862 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
863 {
864 if (U_FAILURE(status)) {
865 return;
866 }
867 switch(prevType) {
868 case none:
869 case tSemiColon:
870 if (curType!=tKeyword) {
871 status = U_UNEXPECTED_TOKEN;
872 }
873 break;
874 case tVariableN :
875 if (curType != tIs && curType != tMod && curType != tIn &&
876 curType != tNot && curType != tWithin) {
877 status = U_UNEXPECTED_TOKEN;
878 }
879 break;
880 case tZero:
881 case tOne:
882 case tTwo:
883 case tFew:
884 case tMany:
885 case tOther:
886 case tKeyword:
887 if (curType != tColon) {
888 status = U_UNEXPECTED_TOKEN;
889 }
890 break;
891 case tColon :
892 if (curType != tVariableN) {
893 status = U_UNEXPECTED_TOKEN;
894 }
895 break;
896 case tIs:
897 if ( curType != tNumber && curType != tNot) {
898 status = U_UNEXPECTED_TOKEN;
899 }
900 break;
901 case tNot:
902 if (curType != tNumber && curType != tIn && curType != tWithin) {
903 status = U_UNEXPECTED_TOKEN;
904 }
905 break;
906 case tMod:
907 case tDot:
908 case tIn:
909 case tWithin:
910 case tAnd:
911 case tOr:
912 if (curType != tNumber && curType != tVariableN) {
913 status = U_UNEXPECTED_TOKEN;
914 }
915 break;
916 case tNumber:
917 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
918 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
919 {
920 status = U_UNEXPECTED_TOKEN;
921 }
922 break;
923 default:
924 status = U_UNEXPECTED_TOKEN;
925 break;
926 }
927 }
928
929 void
930 RuleParser::getNextToken(const UnicodeString& ruleData,
931 int32_t *ruleIndex,
932 UnicodeString& token,
933 tokenType& type,
934 UErrorCode &status)
935 {
936 int32_t curIndex= *ruleIndex;
937 UChar ch;
938 tokenType prevType=none;
939
940 while (curIndex<ruleData.length()) {
941 ch = ruleData.charAt(curIndex);
942 if ( !inRange(ch, type) ) {
943 status = U_ILLEGAL_CHARACTER;
944 return;
945 }
946 switch (type) {
947 case tSpace:
948 if ( *ruleIndex != curIndex ) { // letter
949 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
950 *ruleIndex=curIndex;
951 type=prevType;
952 getKeyType(token, type, status);
953 return;
954 }
955 else {
956 *ruleIndex=*ruleIndex+1;
957 }
958 break; // consective space
959 case tColon:
960 case tSemiColon:
961 if ( *ruleIndex != curIndex ) {
962 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
963 *ruleIndex=curIndex;
964 type=prevType;
965 getKeyType(token, type, status);
966 return;
967 }
968 else {
969 *ruleIndex=curIndex+1;
970 return;
971 }
972 case tLetter:
973 if ((type==prevType)||(prevType==none)) {
974 prevType=type;
975 break;
976 }
977 break;
978 case tNumber:
979 if ((type==prevType)||(prevType==none)) {
980 prevType=type;
981 break;
982 }
983 else {
984 *ruleIndex=curIndex+1;
985 return;
986 }
987 case tDot:
988 if (prevType==none) { // first dot
989 prevType=type;
990 continue;
991 }
992 else {
993 if ( *ruleIndex != curIndex ) {
994 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
995 *ruleIndex=curIndex; // letter
996 type=prevType;
997 getKeyType(token, type, status);
998 return;
999 }
1000 else { // two consective dots
1001 *ruleIndex=curIndex+2;
1002 return;
1003 }
1004 }
1005 break;
1006 default:
1007 status = U_UNEXPECTED_TOKEN;
1008 return;
1009 }
1010 curIndex++;
1011 }
1012 if ( curIndex>=ruleData.length() ) {
1013 if ( (type == tLetter)||(type == tNumber) ) {
1014 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1015 getKeyType(token, type, status);
1016 }
1017 *ruleIndex = ruleData.length();
1018 }
1019 }
1020
1021 UBool
1022 RuleParser::inRange(UChar ch, tokenType& type) {
1023 if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1024 // we assume all characters are in lower case already.
1025 return FALSE;
1026 }
1027 if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1028 type = tLetter;
1029 return TRUE;
1030 }
1031 if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1032 type = tNumber;
1033 return TRUE;
1034 }
1035 switch (ch) {
1036 case COLON:
1037 type = tColon;
1038 return TRUE;
1039 case SPACE:
1040 type = tSpace;
1041 return TRUE;
1042 case SEMI_COLON:
1043 type = tSemiColon;
1044 return TRUE;
1045 case DOT:
1046 type = tDot;
1047 return TRUE;
1048 default :
1049 type = none;
1050 return FALSE;
1051 }
1052 }
1053
1054
1055 void
1056 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1057 {
1058 if ( keyType==tNumber) {
1059 }
1060 else if (token==PK_VAR_N) {
1061 keyType = tVariableN;
1062 }
1063 else if (token==PK_IS) {
1064 keyType = tIs;
1065 }
1066 else if (token==PK_AND) {
1067 keyType = tAnd;
1068 }
1069 else if (token==PK_IN) {
1070 keyType = tIn;
1071 }
1072 else if (token==PK_WITHIN) {
1073 keyType = tWithin;
1074 }
1075 else if (token==PK_NOT) {
1076 keyType = tNot;
1077 }
1078 else if (token==PK_MOD) {
1079 keyType = tMod;
1080 }
1081 else if (token==PK_OR) {
1082 keyType = tOr;
1083 }
1084 else if ( isValidKeyword(token) ) {
1085 keyType = tKeyword;
1086 }
1087 else {
1088 status = U_UNEXPECTED_TOKEN;
1089 }
1090 }
1091
1092 UBool
1093 RuleParser::isValidKeyword(const UnicodeString& token) {
1094 if ( token.length()==0 ) {
1095 return FALSE;
1096 }
1097 if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
1098 int32_t i;
1099 for (i=1; i< token.length(); i++) {
1100 if (idContinueFilter->contains(token.charAt(i))== FALSE) {
1101 return FALSE;
1102 }
1103 }
1104 return TRUE;
1105 }
1106 else {
1107 return FALSE;
1108 }
1109 }
1110
1111 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
1112 fKeywordNames(status)
1113 {
1114 RuleChain *node=header;
1115 UBool addKeywordOther=true;
1116
1117 pos=0;
1118 fKeywordNames.removeAllElements();
1119 while(node!=NULL) {
1120 fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1121 if (node->keyword == PLURAL_KEYWORD_OTHER) {
1122 addKeywordOther= false;
1123 }
1124 node=node->next;
1125 }
1126
1127 if (addKeywordOther) {
1128 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1129 }
1130 }
1131
1132 const UnicodeString*
1133 PluralKeywordEnumeration::snext(UErrorCode& status) {
1134 if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1135 return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1136 }
1137 return NULL;
1138 }
1139
1140 void
1141 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1142 pos=0;
1143 }
1144
1145 int32_t
1146 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1147 return fKeywordNames.size();
1148 }
1149
1150 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1151 UnicodeString *s;
1152 for (int32_t i=0; i<fKeywordNames.size(); ++i) {
1153 if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
1154 delete s;
1155 }
1156 }
1157 }
1158
1159 U_NAMESPACE_END
1160
1161
1162 #endif /* #if !UCONFIG_NO_FORMATTING */
1163
1164 //eof