]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/plurrule.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / plurrule.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 2007-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File PLURRULE.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 *******************************************************************************
13 */
14
15
16 #include "unicode/utypes.h"
17 #include "unicode/localpointer.h"
18 #include "unicode/plurrule.h"
19 #include "unicode/ures.h"
20 #include "cmemory.h"
21 #include "cstring.h"
22 #include "hash.h"
23 #include "mutex.h"
24 #include "patternprops.h"
25 #include "plurrule_impl.h"
26 #include "putilimp.h"
27 #include "ucln_in.h"
28 #include "ustrfmt.h"
29 #include "locutil.h"
30 #include "uassert.h"
31
32 #if !UCONFIG_NO_FORMATTING
33
34 U_NAMESPACE_BEGIN
35
36 // shared by all instances when lazy-initializing samples
37 static UMTX pluralMutex;
38
39 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
40
41 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
42 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
43 static const UChar PK_IN[]={LOW_I,LOW_N,0};
44 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
45 static const UChar PK_IS[]={LOW_I,LOW_S,0};
46 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
47 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
48 static const UChar PK_OR[]={LOW_O,LOW_R,0};
49 static const UChar PK_VAR_N[]={LOW_N,0};
50 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
51
52 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
53 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
54
55 PluralRules::PluralRules(UErrorCode& status)
56 : UObject(),
57 mRules(NULL),
58 mParser(NULL),
59 mSamples(NULL),
60 mSampleInfo(NULL),
61 mSampleInfoCount(0)
62 {
63 if (U_FAILURE(status)) {
64 return;
65 }
66 mParser = new RuleParser();
67 if (mParser==NULL) {
68 status = U_MEMORY_ALLOCATION_ERROR;
69 }
70 }
71
72 PluralRules::PluralRules(const PluralRules& other)
73 : UObject(other),
74 mRules(NULL),
75 mParser(NULL),
76 mSamples(NULL),
77 mSampleInfo(NULL),
78 mSampleInfoCount(0)
79 {
80 *this=other;
81 }
82
83 PluralRules::~PluralRules() {
84 delete mRules;
85 delete mParser;
86 uprv_free(mSamples);
87 uprv_free(mSampleInfo);
88 }
89
90 PluralRules*
91 PluralRules::clone() const {
92 return new PluralRules(*this);
93 }
94
95 PluralRules&
96 PluralRules::operator=(const PluralRules& other) {
97 if (this != &other) {
98 delete mRules;
99 if (other.mRules==NULL) {
100 mRules = NULL;
101 }
102 else {
103 mRules = new RuleChain(*other.mRules);
104 }
105 delete mParser;
106 mParser = new RuleParser();
107
108 uprv_free(mSamples);
109 mSamples = NULL;
110
111 uprv_free(mSampleInfo);
112 mSampleInfo = NULL;
113 mSampleInfoCount = 0;
114 }
115
116 return *this;
117 }
118
119 PluralRules* U_EXPORT2
120 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
121 RuleChain rules;
122
123 if (U_FAILURE(status)) {
124 return NULL;
125 }
126 PluralRules *newRules = new PluralRules(status);
127 if ( (newRules != NULL)&& U_SUCCESS(status) ) {
128 newRules->parseDescription((UnicodeString &)description, rules, status);
129 if (U_SUCCESS(status)) {
130 newRules->addRules(rules);
131 }
132 }
133 if (U_FAILURE(status)) {
134 delete newRules;
135 return NULL;
136 }
137 else {
138 return newRules;
139 }
140 }
141
142 PluralRules* U_EXPORT2
143 PluralRules::createDefaultRules(UErrorCode& status) {
144 return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
145 }
146
147 PluralRules* U_EXPORT2
148 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
149 RuleChain rChain;
150 if (U_FAILURE(status)) {
151 return NULL;
152 }
153 PluralRules *newObj = new PluralRules(status);
154 if (newObj==NULL || U_FAILURE(status)) {
155 delete newObj;
156 return NULL;
157 }
158 UnicodeString locRule = newObj->getRuleFromResource(locale, status);
159 if ((locRule.length() != 0) && U_SUCCESS(status)) {
160 newObj->parseDescription(locRule, rChain, status);
161 if (U_SUCCESS(status)) {
162 newObj->addRules(rChain);
163 }
164 }
165 if (U_FAILURE(status)||(locRule.length() == 0)) {
166 // use default plural rule
167 status = U_ZERO_ERROR;
168 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
169 newObj->parseDescription(defRule, rChain, status);
170 newObj->addRules(rChain);
171 }
172
173 return newObj;
174 }
175
176 UnicodeString
177 PluralRules::select(int32_t number) const {
178 if (mRules == NULL) {
179 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
180 }
181 else {
182 return mRules->select(number);
183 }
184 }
185
186 UnicodeString
187 PluralRules::select(double number) const {
188 if (mRules == NULL) {
189 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
190 }
191 else {
192 return mRules->select(number);
193 }
194 }
195
196 StringEnumeration*
197 PluralRules::getKeywords(UErrorCode& status) const {
198 if (U_FAILURE(status)) return NULL;
199 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
200 if (U_FAILURE(status)) {
201 delete nameEnumerator;
202 return NULL;
203 }
204
205 return nameEnumerator;
206 }
207
208 double
209 PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
210 double val = 0.0;
211 UErrorCode status = U_ZERO_ERROR;
212 int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
213 return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
214 }
215
216 int32_t
217 PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
218 int32_t destCapacity, UErrorCode& error) {
219 return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
220 }
221
222 int32_t
223 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
224 int32_t destCapacity, UErrorCode& status) {
225 return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
226 }
227
228 int32_t
229 PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
230 int32_t destCapacity, UBool includeUnlimited,
231 UErrorCode& status) {
232 initSamples(status);
233 if (U_FAILURE(status)) {
234 return -1;
235 }
236 if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
237 status = U_ILLEGAL_ARGUMENT_ERROR;
238 return -1;
239 }
240
241 int32_t index = getKeywordIndex(keyword, status);
242 if (index == -1) {
243 return 0;
244 }
245
246 const int32_t LIMIT_MASK = 0x1 << 31;
247
248 if (!includeUnlimited) {
249 if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
250 return -1;
251 }
252 }
253
254 int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
255 int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
256 int32_t len = limit - start;
257 if (len <= destCapacity) {
258 destCapacity = len;
259 } else if (includeUnlimited) {
260 len = destCapacity; // no overflow, and don't report more than we copy
261 } else {
262 status = U_BUFFER_OVERFLOW_ERROR;
263 return len;
264 }
265 for (int32_t i = 0; i < destCapacity; ++i, ++start) {
266 dest[i] = mSamples[start];
267 }
268 return len;
269 }
270
271
272 UBool
273 PluralRules::isKeyword(const UnicodeString& keyword) const {
274 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
275 return true;
276 }
277 else {
278 if (mRules==NULL) {
279 return false;
280 }
281 else {
282 return mRules->isKeyword(keyword);
283 }
284 }
285 }
286
287 UnicodeString
288 PluralRules::getKeywordOther() const {
289 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
290 }
291
292 UBool
293 PluralRules::operator==(const PluralRules& other) const {
294 int32_t limit;
295 const UnicodeString *ptrKeyword;
296 UErrorCode status= U_ZERO_ERROR;
297
298 if ( this == &other ) {
299 return TRUE;
300 }
301 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
302 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
303 if (U_FAILURE(status)) {
304 return FALSE;
305 }
306
307 if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
308 return FALSE;
309 }
310 myKeywordList->reset(status);
311 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
312 if (!other.isKeyword(*ptrKeyword)) {
313 return FALSE;
314 }
315 }
316 otherKeywordList->reset(status);
317 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
318 if (!this->isKeyword(*ptrKeyword)) {
319 return FALSE;
320 }
321 }
322 if (U_FAILURE(status)) {
323 return FALSE;
324 }
325
326 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
327 return FALSE;
328 }
329 UnicodeString myKeyword, otherKeyword;
330 for (int32_t i=0; i<limit; ++i) {
331 myKeyword = this->select(i);
332 otherKeyword = other.select(i);
333 if (myKeyword!=otherKeyword) {
334 return FALSE;
335 }
336 }
337 return TRUE;
338 }
339
340 void
341 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
342 {
343 int32_t ruleIndex=0;
344 UnicodeString token;
345 tokenType type;
346 tokenType prevType=none;
347 RuleChain *ruleChain=NULL;
348 AndConstraint *curAndConstraint=NULL;
349 OrConstraint *orNode=NULL;
350 RuleChain *lastChain=NULL;
351
352 if (U_FAILURE(status)) {
353 return;
354 }
355 UnicodeString ruleData = data.toLower("");
356 while (ruleIndex< ruleData.length()) {
357 mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
358 if (U_FAILURE(status)) {
359 return;
360 }
361 mParser->checkSyntax(prevType, type, status);
362 if (U_FAILURE(status)) {
363 return;
364 }
365 switch (type) {
366 case tAnd:
367 U_ASSERT(curAndConstraint != NULL);
368 curAndConstraint = curAndConstraint->add();
369 break;
370 case tOr:
371 lastChain = &rules;
372 while (lastChain->next !=NULL) {
373 lastChain = lastChain->next;
374 }
375 orNode=lastChain->ruleHeader;
376 while (orNode->next != NULL) {
377 orNode = orNode->next;
378 }
379 orNode->next= new OrConstraint();
380 orNode=orNode->next;
381 orNode->next=NULL;
382 curAndConstraint = orNode->add();
383 break;
384 case tIs:
385 U_ASSERT(curAndConstraint != NULL);
386 curAndConstraint->rangeHigh=-1;
387 break;
388 case tNot:
389 U_ASSERT(curAndConstraint != NULL);
390 curAndConstraint->notIn=TRUE;
391 break;
392 case tIn:
393 U_ASSERT(curAndConstraint != NULL);
394 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
395 curAndConstraint->integerOnly = TRUE;
396 break;
397 case tWithin:
398 U_ASSERT(curAndConstraint != NULL);
399 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
400 break;
401 case tNumber:
402 U_ASSERT(curAndConstraint != NULL);
403 if ( (curAndConstraint->op==AndConstraint::MOD)&&
404 (curAndConstraint->opNum == -1 ) ) {
405 curAndConstraint->opNum=getNumberValue(token);
406 }
407 else {
408 if (curAndConstraint->rangeLow == -1) {
409 curAndConstraint->rangeLow=getNumberValue(token);
410 }
411 else {
412 curAndConstraint->rangeHigh=getNumberValue(token);
413 }
414 }
415 break;
416 case tMod:
417 U_ASSERT(curAndConstraint != NULL);
418 curAndConstraint->op=AndConstraint::MOD;
419 break;
420 case tKeyword:
421 if (ruleChain==NULL) {
422 ruleChain = &rules;
423 }
424 else {
425 while (ruleChain->next!=NULL){
426 ruleChain=ruleChain->next;
427 }
428 ruleChain=ruleChain->next=new RuleChain();
429 }
430 if (ruleChain->ruleHeader != NULL) {
431 delete ruleChain->ruleHeader;
432 }
433 orNode = ruleChain->ruleHeader = new OrConstraint();
434 curAndConstraint = orNode->add();
435 ruleChain->keyword = token;
436 break;
437 default:
438 break;
439 }
440 prevType=type;
441 }
442 }
443
444 int32_t
445 PluralRules::getNumberValue(const UnicodeString& token) const {
446 int32_t i;
447 char digits[128];
448
449 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
450 digits[i]='\0';
451
452 return((int32_t)atoi(digits));
453 }
454
455
456 void
457 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
458 int32_t i=*curIndex;
459
460 localeName.remove();
461 while (i< localeData.length()) {
462 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
463 break;
464 }
465 i++;
466 }
467
468 while (i< localeData.length()) {
469 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
470 break;
471 }
472 localeName+=localeData.charAt(i++);
473 }
474 *curIndex=i;
475 }
476
477
478 int32_t
479 PluralRules::getRepeatLimit() const {
480 if (mRules!=NULL) {
481 return mRules->getRepeatLimit();
482 }
483 else {
484 return 0;
485 }
486 }
487
488 int32_t
489 PluralRules::getKeywordIndex(const UnicodeString& keyword,
490 UErrorCode& status) const {
491 if (U_SUCCESS(status)) {
492 int32_t n = 0;
493 RuleChain* rc = mRules;
494 while (rc != NULL) {
495 if (rc->ruleHeader != NULL) {
496 if (rc->keyword == keyword) {
497 return n;
498 }
499 ++n;
500 }
501 rc = rc->next;
502 }
503 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
504 return n;
505 }
506 }
507 return -1;
508 }
509
510 typedef struct SampleRecord {
511 int32_t ruleIndex;
512 double value;
513 } SampleRecord;
514
515 void
516 PluralRules::initSamples(UErrorCode& status) {
517 if (U_FAILURE(status)) {
518 return;
519 }
520 Mutex lock(&pluralMutex);
521
522 if (mSamples) {
523 return;
524 }
525
526 // Note, the original design let you have multiple rules with the same keyword. But
527 // we don't use that in our data and existing functions in this implementation don't
528 // fully support it (for example, the returned keywords is a list and not a set).
529 //
530 // So I don't support this here either. If you ask for samples, or for all values,
531 // you will get information about the first rule with that keyword, not all rules with
532 // that keyword.
533
534 int32_t maxIndex = 0;
535 int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
536 RuleChain* rc = mRules;
537 while (rc != NULL) {
538 if (rc->ruleHeader != NULL) {
539 if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
540 otherIndex = maxIndex;
541 }
542 ++maxIndex;
543 }
544 rc = rc->next;
545 }
546 if (otherIndex == -1) {
547 ++maxIndex;
548 }
549
550 LocalMemory<int32_t> newSampleInfo;
551 if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
552 status = U_MEMORY_ALLOCATION_ERROR;
553 return;
554 }
555
556 const int32_t LIMIT_MASK = 0x1 << 31;
557
558 rc = mRules;
559 int32_t n = 0;
560 while (rc != NULL) {
561 if (rc->ruleHeader != NULL) {
562 newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
563 }
564 rc = rc->next;
565 }
566 if (otherIndex == -1) {
567 newSampleInfo[maxIndex - 1] = 0; // unlimited
568 }
569
570 MaybeStackArray<SampleRecord, 10> newSamples;
571 int32_t sampleCount = 0;
572
573 int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
574 if (limit < 10) {
575 limit = 10;
576 }
577
578 for (int i = 0, keywordsRemaining = maxIndex;
579 keywordsRemaining > 0 && i < limit;
580 ++i) {
581 double val = i / 2.0;
582
583 n = 0;
584 rc = mRules;
585 int32_t found = -1;
586 while (rc != NULL) {
587 if (rc->ruleHeader != NULL) {
588 if (rc->ruleHeader->isFulfilled(val)) {
589 found = n;
590 break;
591 }
592 ++n;
593 }
594 rc = rc->next;
595 }
596 if (found == -1) {
597 // 'other'. If there is an 'other' rule, the rule set is bad since nothing
598 // should leak through, but we don't bother to report that here.
599 found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
600 }
601 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
602 continue;
603 }
604 newSampleInfo[found] += 1; // won't impact limit flag
605
606 if (sampleCount == newSamples.getCapacity()) {
607 int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
608 if (NULL == newSamples.resize(newCapacity, sampleCount)) {
609 status = U_MEMORY_ALLOCATION_ERROR;
610 return;
611 }
612 }
613 newSamples[sampleCount].ruleIndex = found;
614 newSamples[sampleCount].value = val;
615 ++sampleCount;
616
617 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
618 --keywordsRemaining;
619 }
620 }
621
622 // sort the values by index, leaving order otherwise unchanged
623 // this is just a selection sort for simplicity
624 LocalMemory<double> values;
625 if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
626 status = U_MEMORY_ALLOCATION_ERROR;
627 return;
628 }
629 for (int i = 0, j = 0; i < maxIndex; ++i) {
630 for (int k = 0; k < sampleCount; ++k) {
631 if (newSamples[k].ruleIndex == i) {
632 values[j++] = newSamples[k].value;
633 }
634 }
635 }
636
637 // convert array of mask/lengths to array of mask/limits
638 limit = 0;
639 for (int i = 0; i < maxIndex; ++i) {
640 int32_t info = newSampleInfo[i];
641 int32_t len = info & ~LIMIT_MASK;
642 limit += len;
643 // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
644 // it's not really unlimited, so mark it as limited
645 int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
646 newSampleInfo[i] = limit | mask;
647 }
648
649 // ok, we've got good data
650 mSamples = values.orphan();
651 mSampleInfo = newSampleInfo.orphan();
652 mSampleInfoCount = maxIndex;
653 }
654
655 void
656 PluralRules::addRules(RuleChain& rules) {
657 RuleChain *newRule = new RuleChain(rules);
658 this->mRules=newRule;
659 newRule->setRepeatLimit();
660 }
661
662 UnicodeString
663 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
664 UnicodeString emptyStr;
665
666 if (U_FAILURE(errCode)) {
667 return emptyStr;
668 }
669 UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
670 if(U_FAILURE(errCode)) {
671 /* total failure, not even root could be opened */
672 return emptyStr;
673 }
674 UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
675 if(U_FAILURE(errCode)) {
676 ures_close(rb);
677 return emptyStr;
678 }
679 int32_t resLen=0;
680 const char *curLocaleName=locale.getName();
681 const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
682
683 if (s == NULL) {
684 // Check parent locales.
685 UErrorCode status = U_ZERO_ERROR;
686 char parentLocaleName[ULOC_FULLNAME_CAPACITY];
687 const char *curLocaleName=locale.getName();
688 int32_t localeNameLen=0;
689 uprv_strcpy(parentLocaleName, curLocaleName);
690
691 while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
692 ULOC_FULLNAME_CAPACITY, &status)) > 0) {
693 resLen=0;
694 s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
695 if (s != NULL) {
696 errCode = U_ZERO_ERROR;
697 break;
698 }
699 status = U_ZERO_ERROR;
700 }
701 }
702 if (s==NULL) {
703 ures_close(locRes);
704 ures_close(rb);
705 return emptyStr;
706 }
707
708 char setKey[256];
709 UChar result[256];
710 u_UCharsToChars(s, setKey, resLen + 1);
711 // printf("\n PluralRule: %s\n", setKey);
712
713
714 UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
715 if(U_FAILURE(errCode)) {
716 ures_close(locRes);
717 ures_close(rb);
718 return emptyStr;
719 }
720 resLen=0;
721 UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
722 if (U_FAILURE(errCode)) {
723 ures_close(ruleRes);
724 ures_close(locRes);
725 ures_close(rb);
726 return emptyStr;
727 }
728
729 int32_t numberKeys = ures_getSize(setRes);
730 char *key=NULL;
731 int32_t len=0;
732 for(int32_t i=0; i<numberKeys; ++i) {
733 int32_t keyLen;
734 resLen=0;
735 s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
736 keyLen = (int32_t)uprv_strlen(key);
737 u_charsToUChars(key, result+len, keyLen);
738 len += keyLen;
739 result[len++]=COLON;
740 uprv_memcpy(result+len, s, resLen*sizeof(UChar));
741 len += resLen;
742 result[len++]=SEMI_COLON;
743 }
744 result[len++]=0;
745 u_UCharsToChars(result, setKey, len);
746 // printf(" Rule: %s\n", setKey);
747
748 ures_close(setRes);
749 ures_close(ruleRes);
750 ures_close(locRes);
751 ures_close(rb);
752 return UnicodeString(result);
753 }
754
755 AndConstraint::AndConstraint() {
756 op = AndConstraint::NONE;
757 opNum=-1;
758 rangeLow=-1;
759 rangeHigh=-1;
760 notIn=FALSE;
761 integerOnly=FALSE;
762 next=NULL;
763 }
764
765
766 AndConstraint::AndConstraint(const AndConstraint& other) {
767 this->op = other.op;
768 this->opNum=other.opNum;
769 this->rangeLow=other.rangeLow;
770 this->rangeHigh=other.rangeHigh;
771 this->integerOnly=other.integerOnly;
772 this->notIn=other.notIn;
773 if (other.next==NULL) {
774 this->next=NULL;
775 }
776 else {
777 this->next = new AndConstraint(*other.next);
778 }
779 }
780
781 AndConstraint::~AndConstraint() {
782 if (next!=NULL) {
783 delete next;
784 }
785 }
786
787
788 UBool
789 AndConstraint::isFulfilled(double number) {
790 UBool result=TRUE;
791 double value=number;
792
793 // arrrrrrgh
794 if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
795 return notIn;
796 }
797
798 if ( op == MOD ) {
799 value = (int32_t)value % opNum;
800 }
801 if ( rangeHigh == -1 ) {
802 if ( rangeLow == -1 ) {
803 result = TRUE; // empty rule
804 }
805 else {
806 if ( value == rangeLow ) {
807 result = TRUE;
808 }
809 else {
810 result = FALSE;
811 }
812 }
813 }
814 else {
815 if ((rangeLow <= value) && (value <= rangeHigh)) {
816 if (integerOnly) {
817 if ( value != (int32_t)value) {
818 result = FALSE;
819 }
820 else {
821 result = TRUE;
822 }
823 }
824 else {
825 result = TRUE;
826 }
827 }
828 else {
829 result = FALSE;
830 }
831 }
832 if (notIn) {
833 return !result;
834 }
835 else {
836 return result;
837 }
838 }
839
840 UBool
841 AndConstraint::isLimited() {
842 return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
843 }
844
845 int32_t
846 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
847
848 if ( op == MOD ) {
849 return uprv_max(opNum, maxLimit);
850 }
851 else {
852 if ( rangeHigh == -1 ) {
853 return uprv_max(rangeLow, maxLimit);
854 }
855 else{
856 return uprv_max(rangeHigh, maxLimit);
857 }
858 }
859 }
860
861
862 AndConstraint*
863 AndConstraint::add()
864 {
865 this->next = new AndConstraint();
866 return this->next;
867 }
868
869 OrConstraint::OrConstraint() {
870 childNode=NULL;
871 next=NULL;
872 }
873
874 OrConstraint::OrConstraint(const OrConstraint& other) {
875 if ( other.childNode == NULL ) {
876 this->childNode = NULL;
877 }
878 else {
879 this->childNode = new AndConstraint(*(other.childNode));
880 }
881 if (other.next == NULL ) {
882 this->next = NULL;
883 }
884 else {
885 this->next = new OrConstraint(*(other.next));
886 }
887 }
888
889 OrConstraint::~OrConstraint() {
890 if (childNode!=NULL) {
891 delete childNode;
892 }
893 if (next!=NULL) {
894 delete next;
895 }
896 }
897
898 AndConstraint*
899 OrConstraint::add()
900 {
901 OrConstraint *curOrConstraint=this;
902 {
903 while (curOrConstraint->next!=NULL) {
904 curOrConstraint = curOrConstraint->next;
905 }
906 curOrConstraint->next = NULL;
907 curOrConstraint->childNode = new AndConstraint();
908 }
909 return curOrConstraint->childNode;
910 }
911
912 UBool
913 OrConstraint::isFulfilled(double number) {
914 OrConstraint* orRule=this;
915 UBool result=FALSE;
916
917 while (orRule!=NULL && !result) {
918 result=TRUE;
919 AndConstraint* andRule = orRule->childNode;
920 while (andRule!=NULL && result) {
921 result = andRule->isFulfilled(number);
922 andRule=andRule->next;
923 }
924 orRule = orRule->next;
925 }
926
927 return result;
928 }
929
930 UBool
931 OrConstraint::isLimited() {
932 for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
933 UBool result = FALSE;
934 for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
935 if (andc->isLimited()) {
936 result = TRUE;
937 break;
938 }
939 }
940 if (result == FALSE) {
941 return FALSE;
942 }
943 }
944 return TRUE;
945 }
946
947 RuleChain::RuleChain() {
948 ruleHeader=NULL;
949 next = NULL;
950 repeatLimit=0;
951 }
952
953 RuleChain::RuleChain(const RuleChain& other) {
954 this->repeatLimit = other.repeatLimit;
955 this->keyword=other.keyword;
956 if (other.ruleHeader != NULL) {
957 this->ruleHeader = new OrConstraint(*(other.ruleHeader));
958 }
959 else {
960 this->ruleHeader = NULL;
961 }
962 if (other.next != NULL ) {
963 this->next = new RuleChain(*other.next);
964 }
965 else
966 {
967 this->next = NULL;
968 }
969 }
970
971 RuleChain::~RuleChain() {
972 if (next != NULL) {
973 delete next;
974 }
975 if ( ruleHeader != NULL ) {
976 delete ruleHeader;
977 }
978 }
979
980 UnicodeString
981 RuleChain::select(double number) const {
982
983 if ( ruleHeader != NULL ) {
984 if (ruleHeader->isFulfilled(number)) {
985 return keyword;
986 }
987 }
988 if ( next != NULL ) {
989 return next->select(number);
990 }
991 else {
992 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
993 }
994
995 }
996
997 void
998 RuleChain::dumpRules(UnicodeString& result) {
999 UChar digitString[16];
1000
1001 if ( ruleHeader != NULL ) {
1002 result += keyword;
1003 OrConstraint* orRule=ruleHeader;
1004 while ( orRule != NULL ) {
1005 AndConstraint* andRule=orRule->childNode;
1006 while ( andRule != NULL ) {
1007 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
1008 result += UNICODE_STRING_SIMPLE(" n is ");
1009 if (andRule->notIn) {
1010 result += UNICODE_STRING_SIMPLE("not ");
1011 }
1012 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1013 result += UnicodeString(digitString);
1014 }
1015 else {
1016 if (andRule->op==AndConstraint::MOD) {
1017 result += UNICODE_STRING_SIMPLE(" n mod ");
1018 uprv_itou(digitString,16, andRule->opNum,10,0);
1019 result += UnicodeString(digitString);
1020 }
1021 else {
1022 result += UNICODE_STRING_SIMPLE(" n ");
1023 }
1024 if (andRule->rangeHigh==-1) {
1025 if (andRule->notIn) {
1026 result += UNICODE_STRING_SIMPLE(" is not ");
1027 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1028 result += UnicodeString(digitString);
1029 }
1030 else {
1031 result += UNICODE_STRING_SIMPLE(" is ");
1032 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1033 result += UnicodeString(digitString);
1034 }
1035 }
1036 else {
1037 if (andRule->notIn) {
1038 if ( andRule->integerOnly ) {
1039 result += UNICODE_STRING_SIMPLE(" not in ");
1040 }
1041 else {
1042 result += UNICODE_STRING_SIMPLE(" not within ");
1043 }
1044 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1045 result += UnicodeString(digitString);
1046 result += UNICODE_STRING_SIMPLE(" .. ");
1047 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1048 result += UnicodeString(digitString);
1049 }
1050 else {
1051 if ( andRule->integerOnly ) {
1052 result += UNICODE_STRING_SIMPLE(" in ");
1053 }
1054 else {
1055 result += UNICODE_STRING_SIMPLE(" within ");
1056 }
1057 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1058 result += UnicodeString(digitString);
1059 result += UNICODE_STRING_SIMPLE(" .. ");
1060 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1061 }
1062 }
1063 }
1064 if ( (andRule=andRule->next) != NULL) {
1065 result.append(PK_AND, 3);
1066 }
1067 }
1068 if ( (orRule = orRule->next) != NULL ) {
1069 result.append(PK_OR, 2);
1070 }
1071 }
1072 }
1073 if ( next != NULL ) {
1074 next->dumpRules(result);
1075 }
1076 }
1077
1078 int32_t
1079 RuleChain::getRepeatLimit () {
1080 return repeatLimit;
1081 }
1082
1083 void
1084 RuleChain::setRepeatLimit () {
1085 int32_t limit=0;
1086
1087 if ( next != NULL ) {
1088 next->setRepeatLimit();
1089 limit = next->repeatLimit;
1090 }
1091
1092 if ( ruleHeader != NULL ) {
1093 OrConstraint* orRule=ruleHeader;
1094 while ( orRule != NULL ) {
1095 AndConstraint* andRule=orRule->childNode;
1096 while ( andRule != NULL ) {
1097 limit = andRule->updateRepeatLimit(limit);
1098 andRule = andRule->next;
1099 }
1100 orRule = orRule->next;
1101 }
1102 }
1103 repeatLimit = limit;
1104 }
1105
1106 UErrorCode
1107 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1108 if ( arraySize < capacityOfKeywords-1 ) {
1109 keywords[arraySize++]=keyword;
1110 }
1111 else {
1112 return U_BUFFER_OVERFLOW_ERROR;
1113 }
1114
1115 if ( next != NULL ) {
1116 return next->getKeywords(capacityOfKeywords, keywords, arraySize);
1117 }
1118 else {
1119 return U_ZERO_ERROR;
1120 }
1121 }
1122
1123 UBool
1124 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1125 if ( keyword == keywordParam ) {
1126 return TRUE;
1127 }
1128
1129 if ( next != NULL ) {
1130 return next->isKeyword(keywordParam);
1131 }
1132 else {
1133 return FALSE;
1134 }
1135 }
1136
1137
1138 RuleParser::RuleParser() {
1139 }
1140
1141 RuleParser::~RuleParser() {
1142 }
1143
1144 void
1145 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
1146 {
1147 if (U_FAILURE(status)) {
1148 return;
1149 }
1150 switch(prevType) {
1151 case none:
1152 case tSemiColon:
1153 if (curType!=tKeyword) {
1154 status = U_UNEXPECTED_TOKEN;
1155 }
1156 break;
1157 case tVariableN :
1158 if (curType != tIs && curType != tMod && curType != tIn &&
1159 curType != tNot && curType != tWithin) {
1160 status = U_UNEXPECTED_TOKEN;
1161 }
1162 break;
1163 case tZero:
1164 case tOne:
1165 case tTwo:
1166 case tFew:
1167 case tMany:
1168 case tOther:
1169 case tKeyword:
1170 if (curType != tColon) {
1171 status = U_UNEXPECTED_TOKEN;
1172 }
1173 break;
1174 case tColon :
1175 if (curType != tVariableN) {
1176 status = U_UNEXPECTED_TOKEN;
1177 }
1178 break;
1179 case tIs:
1180 if ( curType != tNumber && curType != tNot) {
1181 status = U_UNEXPECTED_TOKEN;
1182 }
1183 break;
1184 case tNot:
1185 if (curType != tNumber && curType != tIn && curType != tWithin) {
1186 status = U_UNEXPECTED_TOKEN;
1187 }
1188 break;
1189 case tMod:
1190 case tDot:
1191 case tIn:
1192 case tWithin:
1193 case tAnd:
1194 case tOr:
1195 if (curType != tNumber && curType != tVariableN) {
1196 status = U_UNEXPECTED_TOKEN;
1197 }
1198 break;
1199 case tNumber:
1200 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
1201 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
1202 {
1203 status = U_UNEXPECTED_TOKEN;
1204 }
1205 break;
1206 default:
1207 status = U_UNEXPECTED_TOKEN;
1208 break;
1209 }
1210 }
1211
1212 void
1213 RuleParser::getNextToken(const UnicodeString& ruleData,
1214 int32_t *ruleIndex,
1215 UnicodeString& token,
1216 tokenType& type,
1217 UErrorCode &status)
1218 {
1219 int32_t curIndex= *ruleIndex;
1220 UChar ch;
1221 tokenType prevType=none;
1222
1223 if (U_FAILURE(status)) {
1224 return;
1225 }
1226 while (curIndex<ruleData.length()) {
1227 ch = ruleData.charAt(curIndex);
1228 if ( !inRange(ch, type) ) {
1229 status = U_ILLEGAL_CHARACTER;
1230 return;
1231 }
1232 switch (type) {
1233 case tSpace:
1234 if ( *ruleIndex != curIndex ) { // letter
1235 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1236 *ruleIndex=curIndex;
1237 type=prevType;
1238 getKeyType(token, type, status);
1239 return;
1240 }
1241 else {
1242 *ruleIndex=*ruleIndex+1;
1243 }
1244 break; // consective space
1245 case tColon:
1246 case tSemiColon:
1247 if ( *ruleIndex != curIndex ) {
1248 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1249 *ruleIndex=curIndex;
1250 type=prevType;
1251 getKeyType(token, type, status);
1252 return;
1253 }
1254 else {
1255 *ruleIndex=curIndex+1;
1256 return;
1257 }
1258 case tLetter:
1259 if ((type==prevType)||(prevType==none)) {
1260 prevType=type;
1261 break;
1262 }
1263 break;
1264 case tNumber:
1265 if ((type==prevType)||(prevType==none)) {
1266 prevType=type;
1267 break;
1268 }
1269 else {
1270 *ruleIndex=curIndex+1;
1271 return;
1272 }
1273 case tDot:
1274 if (prevType==none) { // first dot
1275 prevType=type;
1276 continue;
1277 }
1278 else {
1279 if ( *ruleIndex != curIndex ) {
1280 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1281 *ruleIndex=curIndex; // letter
1282 type=prevType;
1283 getKeyType(token, type, status);
1284 return;
1285 }
1286 else { // two consective dots
1287 *ruleIndex=curIndex+2;
1288 return;
1289 }
1290 }
1291 break;
1292 default:
1293 status = U_UNEXPECTED_TOKEN;
1294 return;
1295 }
1296 curIndex++;
1297 }
1298 if ( curIndex>=ruleData.length() ) {
1299 if ( (type == tLetter)||(type == tNumber) ) {
1300 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1301 getKeyType(token, type, status);
1302 if (U_FAILURE(status)) {
1303 return;
1304 }
1305 }
1306 *ruleIndex = ruleData.length();
1307 }
1308 }
1309
1310 UBool
1311 RuleParser::inRange(UChar ch, tokenType& type) {
1312 if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1313 // we assume all characters are in lower case already.
1314 return FALSE;
1315 }
1316 if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1317 type = tLetter;
1318 return TRUE;
1319 }
1320 if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1321 type = tNumber;
1322 return TRUE;
1323 }
1324 switch (ch) {
1325 case COLON:
1326 type = tColon;
1327 return TRUE;
1328 case SPACE:
1329 type = tSpace;
1330 return TRUE;
1331 case SEMI_COLON:
1332 type = tSemiColon;
1333 return TRUE;
1334 case DOT:
1335 type = tDot;
1336 return TRUE;
1337 default :
1338 type = none;
1339 return FALSE;
1340 }
1341 }
1342
1343
1344 void
1345 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1346 {
1347 if (U_FAILURE(status)) {
1348 return;
1349 }
1350 if ( keyType==tNumber) {
1351 }
1352 else if (0 == token.compare(PK_VAR_N, 1)) {
1353 keyType = tVariableN;
1354 }
1355 else if (0 == token.compare(PK_IS, 2)) {
1356 keyType = tIs;
1357 }
1358 else if (0 == token.compare(PK_AND, 3)) {
1359 keyType = tAnd;
1360 }
1361 else if (0 == token.compare(PK_IN, 2)) {
1362 keyType = tIn;
1363 }
1364 else if (0 == token.compare(PK_WITHIN, 6)) {
1365 keyType = tWithin;
1366 }
1367 else if (0 == token.compare(PK_NOT, 3)) {
1368 keyType = tNot;
1369 }
1370 else if (0 == token.compare(PK_MOD, 3)) {
1371 keyType = tMod;
1372 }
1373 else if (0 == token.compare(PK_OR, 2)) {
1374 keyType = tOr;
1375 }
1376 else if ( isValidKeyword(token) ) {
1377 keyType = tKeyword;
1378 }
1379 else {
1380 status = U_UNEXPECTED_TOKEN;
1381 }
1382 }
1383
1384 UBool
1385 RuleParser::isValidKeyword(const UnicodeString& token) {
1386 return PatternProps::isIdentifier(token.getBuffer(), token.length());
1387 }
1388
1389 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1390 : pos(0), fKeywordNames(status) {
1391 if (U_FAILURE(status)) {
1392 return;
1393 }
1394 fKeywordNames.setDeleter(uprv_deleteUObject);
1395 UBool addKeywordOther=TRUE;
1396 RuleChain *node=header;
1397 while(node!=NULL) {
1398 fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1399 if (U_FAILURE(status)) {
1400 return;
1401 }
1402 if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1403 addKeywordOther= FALSE;
1404 }
1405 node=node->next;
1406 }
1407
1408 if (addKeywordOther) {
1409 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1410 }
1411 }
1412
1413 const UnicodeString*
1414 PluralKeywordEnumeration::snext(UErrorCode& status) {
1415 if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1416 return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1417 }
1418 return NULL;
1419 }
1420
1421 void
1422 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1423 pos=0;
1424 }
1425
1426 int32_t
1427 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1428 return fKeywordNames.size();
1429 }
1430
1431 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1432 }
1433
1434 U_NAMESPACE_END
1435
1436
1437 #endif /* #if !UCONFIG_NO_FORMATTING */
1438
1439 //eof