]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/plurrule.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / i18n / plurrule.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 2007-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File PLURRULE.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 *******************************************************************************
13 */
14
15
16 #include "unicode/uniset.h"
17 #include "unicode/utypes.h"
18 #include "unicode/ures.h"
19 #include "unicode/plurrule.h"
20 #include "cmemory.h"
21 #include "cstring.h"
22 #include "hash.h"
23 #include "mutex.h"
24 #include "plurrule_impl.h"
25 #include "putilimp.h"
26 #include "ucln_in.h"
27 #include "ustrfmt.h"
28 #include "locutil.h"
29
30 /*
31 // TODO(claireho): remove stdio
32 #include "stdio.h"
33 */
34
35 #if !UCONFIG_NO_FORMATTING
36
37 U_NAMESPACE_BEGIN
38
39
40 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
41
42 static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
43 static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
44 static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
45 static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
46 static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
47 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
48 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
49 static const UChar PK_IN[]={LOW_I,LOW_N,0};
50 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
51 static const UChar PK_IS[]={LOW_I,LOW_S,0};
52 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
53 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
54 static const UChar PK_OR[]={LOW_O,LOW_R,0};
55 static const UChar PK_VAR_N[]={LOW_N,0};
56 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
57
58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
60
61 PluralRules::PluralRules(UErrorCode& status)
62 : UObject(),
63 mRules(NULL)
64 {
65 if (U_FAILURE(status)) {
66 return;
67 }
68 mParser = new RuleParser();
69 if (mParser==NULL) {
70 status = U_MEMORY_ALLOCATION_ERROR;
71 }
72 }
73
74 PluralRules::PluralRules(const PluralRules& other)
75 : UObject(other),
76 mRules(NULL),
77 mParser(new RuleParser())
78 {
79 *this=other;
80 }
81
82 PluralRules::~PluralRules() {
83 delete mRules;
84 delete mParser;
85 }
86
87 PluralRules*
88 PluralRules::clone() const {
89 return new PluralRules(*this);
90 }
91
92 PluralRules&
93 PluralRules::operator=(const PluralRules& other) {
94 if (this != &other) {
95 delete mRules;
96 if (other.mRules==NULL) {
97 mRules = NULL;
98 }
99 else {
100 mRules = new RuleChain(*other.mRules);
101 }
102 delete mParser;
103 mParser = new RuleParser();
104 }
105
106 return *this;
107 }
108
109 PluralRules* U_EXPORT2
110 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
111 RuleChain rules;
112
113 if (U_FAILURE(status)) {
114 return NULL;
115 }
116 PluralRules *newRules = new PluralRules(status);
117 if ( (newRules != NULL)&& U_SUCCESS(status) ) {
118 newRules->parseDescription((UnicodeString &)description, rules, status);
119 if (U_SUCCESS(status)) {
120 newRules->addRules(rules);
121 }
122 }
123 if (U_FAILURE(status)) {
124 delete newRules;
125 return NULL;
126 }
127 else {
128 return newRules;
129 }
130 }
131
132 PluralRules* U_EXPORT2
133 PluralRules::createDefaultRules(UErrorCode& status) {
134 return createRules(PLURAL_DEFAULT_RULE, status);
135 }
136
137 PluralRules* U_EXPORT2
138 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
139 RuleChain rChain;
140 if (U_FAILURE(status)) {
141 return NULL;
142 }
143 PluralRules *newObj = new PluralRules(status);
144 if (newObj==NULL || U_FAILURE(status)) {
145 return NULL;
146 }
147 UnicodeString locRule = newObj->getRuleFromResource(locale, status);
148 if ((locRule.length() != 0) && U_SUCCESS(status)) {
149 newObj->parseDescription(locRule, rChain, status);
150 if (U_SUCCESS(status)) {
151 newObj->addRules(rChain);
152 }
153 }
154 if (U_FAILURE(status)||(locRule.length() == 0)) {
155 // use default plural rule
156 status = U_ZERO_ERROR;
157 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
158 newObj->parseDescription(defRule, rChain, status);
159 newObj->addRules(rChain);
160 }
161
162 return newObj;
163 }
164
165 UnicodeString
166 PluralRules::select(int32_t number) const {
167 if (mRules == NULL) {
168 return PLURAL_DEFAULT_RULE;
169 }
170 else {
171 return mRules->select(number);
172 }
173 }
174
175 UnicodeString
176 PluralRules::select(double number) const {
177 if (mRules == NULL) {
178 return PLURAL_DEFAULT_RULE;
179 }
180 else {
181 return mRules->select(number);
182 }
183 }
184
185 StringEnumeration*
186 PluralRules::getKeywords(UErrorCode& status) const {
187 if (U_FAILURE(status)) return NULL;
188 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
189 if (U_FAILURE(status)) return NULL;
190
191 return nameEnumerator;
192 }
193
194
195 UBool
196 PluralRules::isKeyword(const UnicodeString& keyword) const {
197 if ( keyword == PLURAL_KEYWORD_OTHER ) {
198 return true;
199 }
200 else {
201 if (mRules==NULL) {
202 return false;
203 }
204 else {
205 return mRules->isKeyword(keyword);
206 }
207 }
208 }
209
210 UnicodeString
211 PluralRules::getKeywordOther() const {
212 return PLURAL_KEYWORD_OTHER;
213 }
214
215 UBool
216 PluralRules::operator==(const PluralRules& other) const {
217 int32_t limit;
218 UBool sameList = TRUE;
219 const UnicodeString *ptrKeyword;
220 UErrorCode status= U_ZERO_ERROR;
221
222 if ( this == &other ) {
223 return TRUE;
224 }
225 StringEnumeration* myKeywordList = getKeywords(status);
226 if (U_FAILURE(status)) {
227 return FALSE;
228 }
229 StringEnumeration* otherKeywordList =other.getKeywords(status);
230 if (U_FAILURE(status)) {
231 return FALSE;
232 }
233
234 if (myKeywordList->count(status)!=otherKeywordList->count(status) ||
235 U_FAILURE(status)) {
236 sameList = FALSE;
237 }
238 else {
239 myKeywordList->reset(status);
240 if (U_FAILURE(status)) {
241 return FALSE;
242 }
243 while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
244 if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) {
245 sameList = FALSE;
246 }
247 }
248 otherKeywordList->reset(status);
249 if (U_FAILURE(status)) {
250 return FALSE;
251 }
252 while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
253 if (U_FAILURE(status)) {
254 return FALSE;
255 }
256 if (!this->isKeyword(*ptrKeyword)) {
257 sameList = FALSE;
258 }
259 }
260 delete myKeywordList;
261 delete otherKeywordList;
262 if (!sameList) {
263 return FALSE;
264 }
265 }
266
267 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
268 return FALSE;
269 }
270 UnicodeString myKeyword, otherKeyword;
271 for (int32_t i=0; i<limit; ++i) {
272 myKeyword = this->select(i);
273 otherKeyword = other.select(i);
274 if (myKeyword!=otherKeyword) {
275 return FALSE;
276 }
277 }
278 return TRUE;
279 }
280
281 void
282 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
283 {
284 int32_t ruleIndex=0;
285 UnicodeString token;
286 tokenType type;
287 tokenType prevType=none;
288 RuleChain *ruleChain=NULL;
289 AndConstraint *curAndConstraint=NULL;
290 OrConstraint *orNode=NULL;
291 RuleChain *lastChain=NULL;
292
293 if (U_FAILURE(status)) {
294 return;
295 }
296 UnicodeString ruleData = data.toLower();
297 while (ruleIndex< ruleData.length()) {
298 mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
299 if (U_FAILURE(status)) {
300 return;
301 }
302 mParser->checkSyntax(prevType, type, status);
303 if (U_FAILURE(status)) {
304 return;
305 }
306 switch (type) {
307 case tAnd:
308 curAndConstraint = curAndConstraint->add();
309 break;
310 case tOr:
311 lastChain = &rules;
312 while (lastChain->next !=NULL) {
313 lastChain = lastChain->next;
314 }
315 orNode=lastChain->ruleHeader;
316 while (orNode->next != NULL) {
317 orNode = orNode->next;
318 }
319 orNode->next= new OrConstraint();
320 orNode=orNode->next;
321 orNode->next=NULL;
322 curAndConstraint = orNode->add();
323 break;
324 case tIs:
325 curAndConstraint->rangeHigh=-1;
326 break;
327 case tNot:
328 curAndConstraint->notIn=TRUE;
329 break;
330 case tIn:
331 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
332 curAndConstraint->integerOnly = TRUE;
333 break;
334 case tWithin:
335 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
336 break;
337 case tNumber:
338 if ( (curAndConstraint->op==AndConstraint::MOD)&&
339 (curAndConstraint->opNum == -1 ) ) {
340 curAndConstraint->opNum=getNumberValue(token);
341 }
342 else {
343 if (curAndConstraint->rangeLow == -1) {
344 curAndConstraint->rangeLow=getNumberValue(token);
345 }
346 else {
347 curAndConstraint->rangeHigh=getNumberValue(token);
348 }
349 }
350 break;
351 case tMod:
352 curAndConstraint->op=AndConstraint::MOD;
353 break;
354 case tKeyword:
355 if (ruleChain==NULL) {
356 ruleChain = &rules;
357 }
358 else {
359 while (ruleChain->next!=NULL){
360 ruleChain=ruleChain->next;
361 }
362 ruleChain=ruleChain->next=new RuleChain();
363 }
364 orNode = ruleChain->ruleHeader = new OrConstraint();
365 curAndConstraint = orNode->add();
366 ruleChain->keyword = token;
367 break;
368 default:
369 break;
370 }
371 prevType=type;
372 }
373 }
374
375 int32_t
376 PluralRules::getNumberValue(const UnicodeString& token) const {
377 int32_t i;
378 char digits[128];
379
380 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
381 digits[i]='\0';
382
383 return((int32_t)atoi(digits));
384 }
385
386
387 void
388 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
389 int32_t i=*curIndex;
390
391 localeName.remove();
392 while (i< localeData.length()) {
393 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
394 break;
395 }
396 i++;
397 }
398
399 while (i< localeData.length()) {
400 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
401 break;
402 }
403 localeName+=localeData.charAt(i++);
404 }
405 *curIndex=i;
406 }
407
408
409 int32_t
410 PluralRules::getRepeatLimit() const {
411 if (mRules!=NULL) {
412 return mRules->getRepeatLimit();
413 }
414 else {
415 return 0;
416 }
417 }
418
419
420 void
421 PluralRules::addRules(RuleChain& rules) {
422 RuleChain *newRule = new RuleChain(rules);
423 this->mRules=newRule;
424 newRule->setRepeatLimit();
425 }
426
427 UnicodeString
428 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
429 UnicodeString emptyStr;
430
431 if (U_FAILURE(errCode)) {
432 return emptyStr;
433 }
434 UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
435 if(U_FAILURE(errCode)) {
436 /* total failure, not even root could be opened */
437 return emptyStr;
438 }
439 UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
440 if(U_FAILURE(errCode)) {
441 ures_close(rb);
442 return emptyStr;
443 }
444 int32_t resLen=0;
445 const char *curLocaleName=locale.getName();
446 const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
447
448 if (s == NULL) {
449 // Check parent locales.
450 UErrorCode status = U_ZERO_ERROR;
451 char parentLocaleName[ULOC_FULLNAME_CAPACITY];
452 const char *curLocaleName=locale.getName();
453 int32_t localeNameLen=0;
454 uprv_strcpy(parentLocaleName, curLocaleName);
455
456 while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
457 ULOC_FULLNAME_CAPACITY, &status)) > 0) {
458 resLen=0;
459 s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
460 if (s != NULL) {
461 errCode = U_ZERO_ERROR;
462 break;
463 }
464 status = U_ZERO_ERROR;
465 }
466 }
467 if (s==NULL) {
468 ures_close(locRes);
469 ures_close(rb);
470 return emptyStr;
471 }
472
473 char setKey[256];
474 UChar result[256];
475 u_UCharsToChars(s, setKey, resLen + 1);
476 // printf("\n PluralRule: %s\n", setKey);
477
478
479 UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
480 if(U_FAILURE(errCode)) {
481 ures_close(locRes);
482 ures_close(rb);
483 return emptyStr;
484 }
485 resLen=0;
486 UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
487 if (U_FAILURE(errCode)) {
488 ures_close(ruleRes);
489 ures_close(locRes);
490 ures_close(rb);
491 return emptyStr;
492 }
493
494 int32_t numberKeys = ures_getSize(setRes);
495 char *key=NULL;
496 int32_t len=0;
497 for(int32_t i=0; i<numberKeys; ++i) {
498 int32_t keyLen;
499 resLen=0;
500 s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
501 keyLen = (int32_t)uprv_strlen(key);
502 u_charsToUChars(key, result+len, keyLen);
503 len += keyLen;
504 result[len++]=COLON;
505 uprv_memcpy(result+len, s, resLen*sizeof(UChar));
506 len += resLen;
507 result[len++]=SEMI_COLON;
508 }
509 result[len++]=0;
510 u_UCharsToChars(result, setKey, len);
511 // printf(" Rule: %s\n", setKey);
512
513 ures_close(setRes);
514 ures_close(ruleRes);
515 ures_close(locRes);
516 ures_close(rb);
517 return UnicodeString(result);
518
519 }
520
521 AndConstraint::AndConstraint() {
522 op = AndConstraint::NONE;
523 opNum=-1;
524 rangeLow=-1;
525 rangeHigh=-1;
526 notIn=FALSE;
527 integerOnly=FALSE;
528 next=NULL;
529 }
530
531
532 AndConstraint::AndConstraint(const AndConstraint& other) {
533 this->op = other.op;
534 this->opNum=other.opNum;
535 this->rangeLow=other.rangeLow;
536 this->rangeHigh=other.rangeHigh;
537 this->integerOnly=other.integerOnly;
538 this->notIn=other.notIn;
539 if (other.next==NULL) {
540 this->next=NULL;
541 }
542 else {
543 this->next = new AndConstraint(*other.next);
544 }
545 }
546
547 AndConstraint::~AndConstraint() {
548 if (next!=NULL) {
549 delete next;
550 }
551 }
552
553
554 UBool
555 AndConstraint::isFulfilled(double number) {
556 UBool result=TRUE;
557 double value=number;
558
559 if ( op == MOD ) {
560 value = (int32_t)value % opNum;
561 }
562 if ( rangeHigh == -1 ) {
563 if ( rangeLow == -1 ) {
564 result = TRUE; // empty rule
565 }
566 else {
567 if ( value == rangeLow ) {
568 result = TRUE;
569 }
570 else {
571 result = FALSE;
572 }
573 }
574 }
575 else {
576 if ((rangeLow <= value) && (value <= rangeHigh)) {
577 if (integerOnly) {
578 if ( value != (int32_t)value) {
579 result = FALSE;
580 }
581 else {
582 result = TRUE;
583 }
584 }
585 else {
586 result = TRUE;
587 }
588 }
589 else {
590 result = FALSE;
591 }
592 }
593 if (notIn) {
594 return !result;
595 }
596 else {
597 return result;
598 }
599 }
600
601 int32_t
602 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
603
604 if ( op == MOD ) {
605 return uprv_max(opNum, maxLimit);
606 }
607 else {
608 if ( rangeHigh == -1 ) {
609 return uprv_max(rangeLow, maxLimit);
610 }
611 else{
612 return uprv_max(rangeHigh, maxLimit);
613 }
614 }
615 }
616
617
618 AndConstraint*
619 AndConstraint::add()
620 {
621 this->next = new AndConstraint();
622 return this->next;
623 }
624
625 OrConstraint::OrConstraint() {
626 childNode=NULL;
627 next=NULL;
628 }
629
630 OrConstraint::OrConstraint(const OrConstraint& other) {
631 if ( other.childNode == NULL ) {
632 this->childNode = NULL;
633 }
634 else {
635 this->childNode = new AndConstraint(*(other.childNode));
636 }
637 if (other.next == NULL ) {
638 this->next = NULL;
639 }
640 else {
641 this->next = new OrConstraint(*(other.next));
642 }
643 }
644
645 OrConstraint::~OrConstraint() {
646 if (childNode!=NULL) {
647 delete childNode;
648 }
649 if (next!=NULL) {
650 delete next;
651 }
652 }
653
654 AndConstraint*
655 OrConstraint::add()
656 {
657 OrConstraint *curOrConstraint=this;
658 {
659 while (curOrConstraint->next!=NULL) {
660 curOrConstraint = curOrConstraint->next;
661 }
662 curOrConstraint->next = NULL;
663 curOrConstraint->childNode = new AndConstraint();
664 }
665 return curOrConstraint->childNode;
666 }
667
668 UBool
669 OrConstraint::isFulfilled(double number) {
670 OrConstraint* orRule=this;
671 UBool result=FALSE;
672
673 while (orRule!=NULL && !result) {
674 result=TRUE;
675 AndConstraint* andRule = orRule->childNode;
676 while (andRule!=NULL && result) {
677 result = andRule->isFulfilled(number);
678 andRule=andRule->next;
679 }
680 orRule = orRule->next;
681 }
682
683 return result;
684 }
685
686
687 RuleChain::RuleChain() {
688 ruleHeader=NULL;
689 next = NULL;
690 repeatLimit=0;
691 }
692
693 RuleChain::RuleChain(const RuleChain& other) {
694 this->repeatLimit = other.repeatLimit;
695 this->keyword=other.keyword;
696 if (other.ruleHeader != NULL) {
697 this->ruleHeader = new OrConstraint(*(other.ruleHeader));
698 }
699 else {
700 this->ruleHeader = NULL;
701 }
702 if (other.next != NULL ) {
703 this->next = new RuleChain(*other.next);
704 }
705 else
706 {
707 this->next = NULL;
708 }
709 }
710
711 RuleChain::~RuleChain() {
712 if (next != NULL) {
713 delete next;
714 }
715 if ( ruleHeader != NULL ) {
716 delete ruleHeader;
717 }
718 }
719
720 UnicodeString
721 RuleChain::select(double number) const {
722
723 if ( ruleHeader != NULL ) {
724 if (ruleHeader->isFulfilled(number)) {
725 return keyword;
726 }
727 }
728 if ( next != NULL ) {
729 return next->select(number);
730 }
731 else {
732 return PLURAL_KEYWORD_OTHER;
733 }
734
735 }
736
737 void
738 RuleChain::dumpRules(UnicodeString& result) {
739 UChar digitString[16];
740
741 if ( ruleHeader != NULL ) {
742 result += keyword;
743 OrConstraint* orRule=ruleHeader;
744 while ( orRule != NULL ) {
745 AndConstraint* andRule=orRule->childNode;
746 while ( andRule != NULL ) {
747 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
748 result += UNICODE_STRING_SIMPLE(" n is ");
749 if (andRule->notIn) {
750 result += UNICODE_STRING_SIMPLE("not ");
751 }
752 uprv_itou(digitString,16, andRule->rangeLow,10,0);
753 result += UnicodeString(digitString);
754 }
755 else {
756 if (andRule->op==AndConstraint::MOD) {
757 result += UNICODE_STRING_SIMPLE(" n mod ");
758 uprv_itou(digitString,16, andRule->opNum,10,0);
759 result += UnicodeString(digitString);
760 }
761 else {
762 result += UNICODE_STRING_SIMPLE(" n ");
763 }
764 if (andRule->rangeHigh==-1) {
765 if (andRule->notIn) {
766 result += UNICODE_STRING_SIMPLE(" is not ");
767 uprv_itou(digitString,16, andRule->rangeLow,10,0);
768 result += UnicodeString(digitString);
769 }
770 else {
771 result += UNICODE_STRING_SIMPLE(" is ");
772 uprv_itou(digitString,16, andRule->rangeLow,10,0);
773 result += UnicodeString(digitString);
774 }
775 }
776 else {
777 if (andRule->notIn) {
778 if ( andRule->integerOnly ) {
779 result += UNICODE_STRING_SIMPLE(" not in ");
780 }
781 else {
782 result += UNICODE_STRING_SIMPLE(" not within ");
783 }
784 uprv_itou(digitString,16, andRule->rangeLow,10,0);
785 result += UnicodeString(digitString);
786 result += UNICODE_STRING_SIMPLE(" .. ");
787 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
788 result += UnicodeString(digitString);
789 }
790 else {
791 if ( andRule->integerOnly ) {
792 result += UNICODE_STRING_SIMPLE(" in ");
793 }
794 else {
795 result += UNICODE_STRING_SIMPLE(" within ");
796 }
797 uprv_itou(digitString,16, andRule->rangeLow,10,0);
798 result += UnicodeString(digitString);
799 result += UNICODE_STRING_SIMPLE(" .. ");
800 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
801 }
802 }
803 }
804 if ( (andRule=andRule->next) != NULL) {
805 result += PK_AND;
806 }
807 }
808 if ( (orRule = orRule->next) != NULL ) {
809 result += PK_OR;
810 }
811 }
812 }
813 if ( next != NULL ) {
814 next->dumpRules(result);
815 }
816 }
817
818 int32_t
819 RuleChain::getRepeatLimit () {
820 return repeatLimit;
821 }
822
823 void
824 RuleChain::setRepeatLimit () {
825 int32_t limit=0;
826
827 if ( next != NULL ) {
828 next->setRepeatLimit();
829 limit = next->repeatLimit;
830 }
831
832 if ( ruleHeader != NULL ) {
833 OrConstraint* orRule=ruleHeader;
834 while ( orRule != NULL ) {
835 AndConstraint* andRule=orRule->childNode;
836 while ( andRule != NULL ) {
837 limit = andRule->updateRepeatLimit(limit);
838 andRule = andRule->next;
839 }
840 orRule = orRule->next;
841 }
842 }
843 repeatLimit = limit;
844 }
845
846 UErrorCode
847 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
848 if ( arraySize < capacityOfKeywords-1 ) {
849 keywords[arraySize++]=keyword;
850 }
851 else {
852 return U_BUFFER_OVERFLOW_ERROR;
853 }
854
855 if ( next != NULL ) {
856 return next->getKeywords(capacityOfKeywords, keywords, arraySize);
857 }
858 else {
859 return U_ZERO_ERROR;
860 }
861 }
862
863 UBool
864 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
865 if ( keyword == keywordParam ) {
866 return TRUE;
867 }
868
869 if ( next != NULL ) {
870 return next->isKeyword(keywordParam);
871 }
872 else {
873 return FALSE;
874 }
875 }
876
877
878 RuleParser::RuleParser() {
879 UErrorCode err=U_ZERO_ERROR;
880 const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
881 const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
882 idStartFilter = new UnicodeSet(idStart, err);
883 idContinueFilter = new UnicodeSet(idContinue, err);
884 }
885
886 RuleParser::~RuleParser() {
887 delete idStartFilter;
888 delete idContinueFilter;
889 }
890
891 void
892 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
893 {
894 if (U_FAILURE(status)) {
895 return;
896 }
897 switch(prevType) {
898 case none:
899 case tSemiColon:
900 if (curType!=tKeyword) {
901 status = U_UNEXPECTED_TOKEN;
902 }
903 break;
904 case tVariableN :
905 if (curType != tIs && curType != tMod && curType != tIn &&
906 curType != tNot && curType != tWithin) {
907 status = U_UNEXPECTED_TOKEN;
908 }
909 break;
910 case tZero:
911 case tOne:
912 case tTwo:
913 case tFew:
914 case tMany:
915 case tOther:
916 case tKeyword:
917 if (curType != tColon) {
918 status = U_UNEXPECTED_TOKEN;
919 }
920 break;
921 case tColon :
922 if (curType != tVariableN) {
923 status = U_UNEXPECTED_TOKEN;
924 }
925 break;
926 case tIs:
927 if ( curType != tNumber && curType != tNot) {
928 status = U_UNEXPECTED_TOKEN;
929 }
930 break;
931 case tNot:
932 if (curType != tNumber && curType != tIn && curType != tWithin) {
933 status = U_UNEXPECTED_TOKEN;
934 }
935 break;
936 case tMod:
937 case tDot:
938 case tIn:
939 case tWithin:
940 case tAnd:
941 case tOr:
942 if (curType != tNumber && curType != tVariableN) {
943 status = U_UNEXPECTED_TOKEN;
944 }
945 break;
946 case tNumber:
947 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
948 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
949 {
950 status = U_UNEXPECTED_TOKEN;
951 }
952 break;
953 default:
954 status = U_UNEXPECTED_TOKEN;
955 break;
956 }
957 }
958
959 void
960 RuleParser::getNextToken(const UnicodeString& ruleData,
961 int32_t *ruleIndex,
962 UnicodeString& token,
963 tokenType& type,
964 UErrorCode &status)
965 {
966 int32_t curIndex= *ruleIndex;
967 UChar ch;
968 tokenType prevType=none;
969
970 if (U_FAILURE(status)) {
971 return;
972 }
973 while (curIndex<ruleData.length()) {
974 ch = ruleData.charAt(curIndex);
975 if ( !inRange(ch, type) ) {
976 status = U_ILLEGAL_CHARACTER;
977 return;
978 }
979 switch (type) {
980 case tSpace:
981 if ( *ruleIndex != curIndex ) { // letter
982 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
983 *ruleIndex=curIndex;
984 type=prevType;
985 getKeyType(token, type, status);
986 return;
987 }
988 else {
989 *ruleIndex=*ruleIndex+1;
990 }
991 break; // consective space
992 case tColon:
993 case tSemiColon:
994 if ( *ruleIndex != curIndex ) {
995 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
996 *ruleIndex=curIndex;
997 type=prevType;
998 getKeyType(token, type, status);
999 return;
1000 }
1001 else {
1002 *ruleIndex=curIndex+1;
1003 return;
1004 }
1005 case tLetter:
1006 if ((type==prevType)||(prevType==none)) {
1007 prevType=type;
1008 break;
1009 }
1010 break;
1011 case tNumber:
1012 if ((type==prevType)||(prevType==none)) {
1013 prevType=type;
1014 break;
1015 }
1016 else {
1017 *ruleIndex=curIndex+1;
1018 return;
1019 }
1020 case tDot:
1021 if (prevType==none) { // first dot
1022 prevType=type;
1023 continue;
1024 }
1025 else {
1026 if ( *ruleIndex != curIndex ) {
1027 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1028 *ruleIndex=curIndex; // letter
1029 type=prevType;
1030 getKeyType(token, type, status);
1031 return;
1032 }
1033 else { // two consective dots
1034 *ruleIndex=curIndex+2;
1035 return;
1036 }
1037 }
1038 break;
1039 default:
1040 status = U_UNEXPECTED_TOKEN;
1041 return;
1042 }
1043 curIndex++;
1044 }
1045 if ( curIndex>=ruleData.length() ) {
1046 if ( (type == tLetter)||(type == tNumber) ) {
1047 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1048 getKeyType(token, type, status);
1049 if (U_FAILURE(status)) {
1050 return;
1051 }
1052 }
1053 *ruleIndex = ruleData.length();
1054 }
1055 }
1056
1057 UBool
1058 RuleParser::inRange(UChar ch, tokenType& type) {
1059 if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1060 // we assume all characters are in lower case already.
1061 return FALSE;
1062 }
1063 if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1064 type = tLetter;
1065 return TRUE;
1066 }
1067 if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1068 type = tNumber;
1069 return TRUE;
1070 }
1071 switch (ch) {
1072 case COLON:
1073 type = tColon;
1074 return TRUE;
1075 case SPACE:
1076 type = tSpace;
1077 return TRUE;
1078 case SEMI_COLON:
1079 type = tSemiColon;
1080 return TRUE;
1081 case DOT:
1082 type = tDot;
1083 return TRUE;
1084 default :
1085 type = none;
1086 return FALSE;
1087 }
1088 }
1089
1090
1091 void
1092 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1093 {
1094 if (U_FAILURE(status)) {
1095 return;
1096 }
1097 if ( keyType==tNumber) {
1098 }
1099 else if (token==PK_VAR_N) {
1100 keyType = tVariableN;
1101 }
1102 else if (token==PK_IS) {
1103 keyType = tIs;
1104 }
1105 else if (token==PK_AND) {
1106 keyType = tAnd;
1107 }
1108 else if (token==PK_IN) {
1109 keyType = tIn;
1110 }
1111 else if (token==PK_WITHIN) {
1112 keyType = tWithin;
1113 }
1114 else if (token==PK_NOT) {
1115 keyType = tNot;
1116 }
1117 else if (token==PK_MOD) {
1118 keyType = tMod;
1119 }
1120 else if (token==PK_OR) {
1121 keyType = tOr;
1122 }
1123 else if ( isValidKeyword(token) ) {
1124 keyType = tKeyword;
1125 }
1126 else {
1127 status = U_UNEXPECTED_TOKEN;
1128 }
1129 }
1130
1131 UBool
1132 RuleParser::isValidKeyword(const UnicodeString& token) {
1133 if ( token.length()==0 ) {
1134 return FALSE;
1135 }
1136 if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
1137 int32_t i;
1138 for (i=1; i< token.length(); i++) {
1139 if (idContinueFilter->contains(token.charAt(i))== FALSE) {
1140 return FALSE;
1141 }
1142 }
1143 return TRUE;
1144 }
1145 else {
1146 return FALSE;
1147 }
1148 }
1149
1150 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
1151 fKeywordNames(status)
1152 {
1153 RuleChain *node=header;
1154 UBool addKeywordOther=true;
1155
1156 if (U_FAILURE(status)) {
1157 return;
1158 }
1159 pos=0;
1160 fKeywordNames.removeAllElements();
1161 while(node!=NULL) {
1162 fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1163 if (U_FAILURE(status)) {
1164 return;
1165 }
1166 if (node->keyword == PLURAL_KEYWORD_OTHER) {
1167 addKeywordOther= false;
1168 }
1169 node=node->next;
1170 }
1171
1172 if (addKeywordOther) {
1173 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1174 if (U_FAILURE(status)) {
1175 return;
1176 }
1177 }
1178 }
1179
1180 const UnicodeString*
1181 PluralKeywordEnumeration::snext(UErrorCode& status) {
1182 if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1183 return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1184 }
1185 return NULL;
1186 }
1187
1188 void
1189 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1190 pos=0;
1191 }
1192
1193 int32_t
1194 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1195 return fKeywordNames.size();
1196 }
1197
1198 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1199 UnicodeString *s;
1200 for (int32_t i=0; i<fKeywordNames.size(); ++i) {
1201 if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
1202 delete s;
1203 }
1204 }
1205 }
1206
1207 U_NAMESPACE_END
1208
1209
1210 #endif /* #if !UCONFIG_NO_FORMATTING */
1211
1212 //eof