]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/plurrule.cpp
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / i18n / plurrule.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 2007-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File plurrule.cpp
8 */
9
10 #include "unicode/utypes.h"
11 #include "unicode/localpointer.h"
12 #include "unicode/plurrule.h"
13 #include "unicode/upluralrules.h"
14 #include "unicode/ures.h"
15 #include "cmemory.h"
16 #include "cstring.h"
17 #include "hash.h"
18 #include "mutex.h"
19 #include "patternprops.h"
20 #include "plurrule_impl.h"
21 #include "putilimp.h"
22 #include "ucln_in.h"
23 #include "ustrfmt.h"
24 #include "locutil.h"
25 #include "uassert.h"
26
27 #if !UCONFIG_NO_FORMATTING
28
29 U_NAMESPACE_BEGIN
30
31 // shared by all instances when lazy-initializing samples
32 static UMutex pluralMutex = U_MUTEX_INITIALIZER;
33
34 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
35
36 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
37 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
38 static const UChar PK_IN[]={LOW_I,LOW_N,0};
39 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
40 static const UChar PK_IS[]={LOW_I,LOW_S,0};
41 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
42 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
43 static const UChar PK_OR[]={LOW_O,LOW_R,0};
44 static const UChar PK_VAR_N[]={LOW_N,0};
45 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
46
47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
48 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
49
50 PluralRules::PluralRules(UErrorCode& status)
51 : UObject(),
52 mRules(NULL),
53 mParser(NULL),
54 mSamples(NULL),
55 mSampleInfo(NULL),
56 mSampleInfoCount(0)
57 {
58 if (U_FAILURE(status)) {
59 return;
60 }
61 mParser = new RuleParser();
62 if (mParser==NULL) {
63 status = U_MEMORY_ALLOCATION_ERROR;
64 }
65 }
66
67 PluralRules::PluralRules(const PluralRules& other)
68 : UObject(other),
69 mRules(NULL),
70 mParser(NULL),
71 mSamples(NULL),
72 mSampleInfo(NULL),
73 mSampleInfoCount(0)
74 {
75 *this=other;
76 }
77
78 PluralRules::~PluralRules() {
79 delete mRules;
80 delete mParser;
81 uprv_free(mSamples);
82 uprv_free(mSampleInfo);
83 }
84
85 PluralRules*
86 PluralRules::clone() const {
87 return new PluralRules(*this);
88 }
89
90 PluralRules&
91 PluralRules::operator=(const PluralRules& other) {
92 if (this != &other) {
93 delete mRules;
94 if (other.mRules==NULL) {
95 mRules = NULL;
96 }
97 else {
98 mRules = new RuleChain(*other.mRules);
99 }
100 delete mParser;
101 mParser = new RuleParser();
102
103 uprv_free(mSamples);
104 mSamples = NULL;
105
106 uprv_free(mSampleInfo);
107 mSampleInfo = NULL;
108 mSampleInfoCount = 0;
109 }
110
111 return *this;
112 }
113
114 PluralRules* U_EXPORT2
115 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
116 RuleChain rules;
117
118 if (U_FAILURE(status)) {
119 return NULL;
120 }
121 PluralRules *newRules = new PluralRules(status);
122 if ( (newRules != NULL)&& U_SUCCESS(status) ) {
123 newRules->parseDescription((UnicodeString &)description, rules, status);
124 if (U_SUCCESS(status)) {
125 newRules->addRules(rules);
126 }
127 }
128 if (U_FAILURE(status)) {
129 delete newRules;
130 return NULL;
131 }
132 else {
133 return newRules;
134 }
135 }
136
137 PluralRules* U_EXPORT2
138 PluralRules::createDefaultRules(UErrorCode& status) {
139 return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
140 }
141
142 PluralRules* U_EXPORT2
143 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
144 return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
145 }
146
147 PluralRules* U_EXPORT2
148 PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
149 RuleChain rChain;
150 if (U_FAILURE(status)) {
151 return NULL;
152 }
153 if (type >= UPLURAL_TYPE_COUNT) {
154 status = U_ILLEGAL_ARGUMENT_ERROR;
155 return NULL;
156 }
157 PluralRules *newObj = new PluralRules(status);
158 if (newObj==NULL || U_FAILURE(status)) {
159 delete newObj;
160 return NULL;
161 }
162 UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
163 if ((locRule.length() != 0) && U_SUCCESS(status)) {
164 newObj->parseDescription(locRule, rChain, status);
165 if (U_SUCCESS(status)) {
166 newObj->addRules(rChain);
167 }
168 }
169 if (U_FAILURE(status)||(locRule.length() == 0)) {
170 // use default plural rule
171 status = U_ZERO_ERROR;
172 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
173 newObj->parseDescription(defRule, rChain, status);
174 newObj->addRules(rChain);
175 }
176
177 return newObj;
178 }
179
180 UnicodeString
181 PluralRules::select(int32_t number) const {
182 if (mRules == NULL) {
183 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
184 }
185 else {
186 return mRules->select(number);
187 }
188 }
189
190 UnicodeString
191 PluralRules::select(double number) const {
192 if (mRules == NULL) {
193 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
194 }
195 else {
196 return mRules->select(number);
197 }
198 }
199
200 StringEnumeration*
201 PluralRules::getKeywords(UErrorCode& status) const {
202 if (U_FAILURE(status)) return NULL;
203 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
204 if (U_FAILURE(status)) {
205 delete nameEnumerator;
206 return NULL;
207 }
208
209 return nameEnumerator;
210 }
211
212 double
213 PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
214 double val = 0.0;
215 UErrorCode status = U_ZERO_ERROR;
216 int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
217 return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
218 }
219
220 int32_t
221 PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
222 int32_t destCapacity, UErrorCode& error) {
223 return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
224 }
225
226 int32_t
227 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
228 int32_t destCapacity, UErrorCode& status) {
229 return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
230 }
231
232 int32_t
233 PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
234 int32_t destCapacity, UBool includeUnlimited,
235 UErrorCode& status) {
236 initSamples(status);
237 if (U_FAILURE(status)) {
238 return -1;
239 }
240 if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
241 status = U_ILLEGAL_ARGUMENT_ERROR;
242 return -1;
243 }
244
245 int32_t index = getKeywordIndex(keyword, status);
246 if (index == -1) {
247 return 0;
248 }
249
250 const int32_t LIMIT_MASK = 0x1 << 31;
251
252 if (!includeUnlimited) {
253 if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
254 return -1;
255 }
256 }
257
258 int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
259 int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
260 int32_t len = limit - start;
261 if (len <= destCapacity) {
262 destCapacity = len;
263 } else if (includeUnlimited) {
264 len = destCapacity; // no overflow, and don't report more than we copy
265 } else {
266 status = U_BUFFER_OVERFLOW_ERROR;
267 return len;
268 }
269 for (int32_t i = 0; i < destCapacity; ++i, ++start) {
270 dest[i] = mSamples[start];
271 }
272 return len;
273 }
274
275
276 UBool
277 PluralRules::isKeyword(const UnicodeString& keyword) const {
278 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
279 return true;
280 }
281 else {
282 if (mRules==NULL) {
283 return false;
284 }
285 else {
286 return mRules->isKeyword(keyword);
287 }
288 }
289 }
290
291 UnicodeString
292 PluralRules::getKeywordOther() const {
293 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
294 }
295
296 UBool
297 PluralRules::operator==(const PluralRules& other) const {
298 int32_t limit;
299 const UnicodeString *ptrKeyword;
300 UErrorCode status= U_ZERO_ERROR;
301
302 if ( this == &other ) {
303 return TRUE;
304 }
305 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
306 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
307 if (U_FAILURE(status)) {
308 return FALSE;
309 }
310
311 if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
312 return FALSE;
313 }
314 myKeywordList->reset(status);
315 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
316 if (!other.isKeyword(*ptrKeyword)) {
317 return FALSE;
318 }
319 }
320 otherKeywordList->reset(status);
321 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
322 if (!this->isKeyword(*ptrKeyword)) {
323 return FALSE;
324 }
325 }
326 if (U_FAILURE(status)) {
327 return FALSE;
328 }
329
330 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
331 return FALSE;
332 }
333 UnicodeString myKeyword, otherKeyword;
334 for (int32_t i=0; i<limit; ++i) {
335 myKeyword = this->select(i);
336 otherKeyword = other.select(i);
337 if (myKeyword!=otherKeyword) {
338 return FALSE;
339 }
340 }
341 return TRUE;
342 }
343
344 void
345 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
346 {
347 int32_t ruleIndex=0;
348 UnicodeString token;
349 tokenType type;
350 tokenType prevType=none;
351 RuleChain *ruleChain=NULL;
352 AndConstraint *curAndConstraint=NULL;
353 OrConstraint *orNode=NULL;
354 RuleChain *lastChain=NULL;
355
356 if (U_FAILURE(status)) {
357 return;
358 }
359 UnicodeString ruleData = data.toLower("");
360 while (ruleIndex< ruleData.length()) {
361 mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
362 if (U_FAILURE(status)) {
363 return;
364 }
365 mParser->checkSyntax(prevType, type, status);
366 if (U_FAILURE(status)) {
367 return;
368 }
369 switch (type) {
370 case tAnd:
371 U_ASSERT(curAndConstraint != NULL);
372 curAndConstraint = curAndConstraint->add();
373 break;
374 case tOr:
375 lastChain = &rules;
376 while (lastChain->next !=NULL) {
377 lastChain = lastChain->next;
378 }
379 orNode=lastChain->ruleHeader;
380 while (orNode->next != NULL) {
381 orNode = orNode->next;
382 }
383 orNode->next= new OrConstraint();
384 orNode=orNode->next;
385 orNode->next=NULL;
386 curAndConstraint = orNode->add();
387 break;
388 case tIs:
389 U_ASSERT(curAndConstraint != NULL);
390 curAndConstraint->rangeHigh=-1;
391 break;
392 case tNot:
393 U_ASSERT(curAndConstraint != NULL);
394 curAndConstraint->notIn=TRUE;
395 break;
396 case tIn:
397 U_ASSERT(curAndConstraint != NULL);
398 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
399 curAndConstraint->integerOnly = TRUE;
400 break;
401 case tWithin:
402 U_ASSERT(curAndConstraint != NULL);
403 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
404 break;
405 case tNumber:
406 U_ASSERT(curAndConstraint != NULL);
407 if ( (curAndConstraint->op==AndConstraint::MOD)&&
408 (curAndConstraint->opNum == -1 ) ) {
409 curAndConstraint->opNum=getNumberValue(token);
410 }
411 else {
412 if (curAndConstraint->rangeLow == -1) {
413 curAndConstraint->rangeLow=getNumberValue(token);
414 }
415 else {
416 curAndConstraint->rangeHigh=getNumberValue(token);
417 }
418 }
419 break;
420 case tMod:
421 U_ASSERT(curAndConstraint != NULL);
422 curAndConstraint->op=AndConstraint::MOD;
423 break;
424 case tKeyword:
425 if (ruleChain==NULL) {
426 ruleChain = &rules;
427 }
428 else {
429 while (ruleChain->next!=NULL){
430 ruleChain=ruleChain->next;
431 }
432 ruleChain=ruleChain->next=new RuleChain();
433 }
434 if (ruleChain->ruleHeader != NULL) {
435 delete ruleChain->ruleHeader;
436 }
437 orNode = ruleChain->ruleHeader = new OrConstraint();
438 curAndConstraint = orNode->add();
439 ruleChain->keyword = token;
440 break;
441 default:
442 break;
443 }
444 prevType=type;
445 }
446 }
447
448 int32_t
449 PluralRules::getNumberValue(const UnicodeString& token) const {
450 int32_t i;
451 char digits[128];
452
453 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
454 digits[i]='\0';
455
456 return((int32_t)atoi(digits));
457 }
458
459
460 void
461 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
462 int32_t i=*curIndex;
463
464 localeName.remove();
465 while (i< localeData.length()) {
466 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
467 break;
468 }
469 i++;
470 }
471
472 while (i< localeData.length()) {
473 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
474 break;
475 }
476 localeName+=localeData.charAt(i++);
477 }
478 *curIndex=i;
479 }
480
481
482 int32_t
483 PluralRules::getRepeatLimit() const {
484 if (mRules!=NULL) {
485 return mRules->getRepeatLimit();
486 }
487 else {
488 return 0;
489 }
490 }
491
492 int32_t
493 PluralRules::getKeywordIndex(const UnicodeString& keyword,
494 UErrorCode& status) const {
495 if (U_SUCCESS(status)) {
496 int32_t n = 0;
497 RuleChain* rc = mRules;
498 while (rc != NULL) {
499 if (rc->ruleHeader != NULL) {
500 if (rc->keyword == keyword) {
501 return n;
502 }
503 ++n;
504 }
505 rc = rc->next;
506 }
507 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
508 return n;
509 }
510 }
511 return -1;
512 }
513
514 typedef struct SampleRecord {
515 int32_t ruleIndex;
516 double value;
517 } SampleRecord;
518
519 void
520 PluralRules::initSamples(UErrorCode& status) {
521 if (U_FAILURE(status)) {
522 return;
523 }
524 Mutex lock(&pluralMutex);
525
526 if (mSamples) {
527 return;
528 }
529
530 // Note, the original design let you have multiple rules with the same keyword. But
531 // we don't use that in our data and existing functions in this implementation don't
532 // fully support it (for example, the returned keywords is a list and not a set).
533 //
534 // So I don't support this here either. If you ask for samples, or for all values,
535 // you will get information about the first rule with that keyword, not all rules with
536 // that keyword.
537
538 int32_t maxIndex = 0;
539 int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
540 RuleChain* rc = mRules;
541 while (rc != NULL) {
542 if (rc->ruleHeader != NULL) {
543 if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
544 otherIndex = maxIndex;
545 }
546 ++maxIndex;
547 }
548 rc = rc->next;
549 }
550 if (otherIndex == -1) {
551 ++maxIndex;
552 }
553
554 LocalMemory<int32_t> newSampleInfo;
555 if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
556 status = U_MEMORY_ALLOCATION_ERROR;
557 return;
558 }
559
560 const int32_t LIMIT_MASK = 0x1 << 31;
561
562 rc = mRules;
563 int32_t n = 0;
564 while (rc != NULL) {
565 if (rc->ruleHeader != NULL) {
566 newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
567 }
568 rc = rc->next;
569 }
570 if (otherIndex == -1) {
571 newSampleInfo[maxIndex - 1] = 0; // unlimited
572 }
573
574 MaybeStackArray<SampleRecord, 10> newSamples;
575 int32_t sampleCount = 0;
576
577 int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
578 if (limit < 10) {
579 limit = 10;
580 }
581
582 for (int i = 0, keywordsRemaining = maxIndex;
583 keywordsRemaining > 0 && i < limit;
584 ++i) {
585 double val = i / 2.0;
586
587 n = 0;
588 rc = mRules;
589 int32_t found = -1;
590 while (rc != NULL) {
591 if (rc->ruleHeader != NULL) {
592 if (rc->ruleHeader->isFulfilled(val)) {
593 found = n;
594 break;
595 }
596 ++n;
597 }
598 rc = rc->next;
599 }
600 if (found == -1) {
601 // 'other'. If there is an 'other' rule, the rule set is bad since nothing
602 // should leak through, but we don't bother to report that here.
603 found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
604 }
605 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
606 continue;
607 }
608 newSampleInfo[found] += 1; // won't impact limit flag
609
610 if (sampleCount == newSamples.getCapacity()) {
611 int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
612 if (NULL == newSamples.resize(newCapacity, sampleCount)) {
613 status = U_MEMORY_ALLOCATION_ERROR;
614 return;
615 }
616 }
617 newSamples[sampleCount].ruleIndex = found;
618 newSamples[sampleCount].value = val;
619 ++sampleCount;
620
621 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
622 --keywordsRemaining;
623 }
624 }
625
626 // sort the values by index, leaving order otherwise unchanged
627 // this is just a selection sort for simplicity
628 LocalMemory<double> values;
629 if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
630 status = U_MEMORY_ALLOCATION_ERROR;
631 return;
632 }
633 for (int i = 0, j = 0; i < maxIndex; ++i) {
634 for (int k = 0; k < sampleCount; ++k) {
635 if (newSamples[k].ruleIndex == i) {
636 values[j++] = newSamples[k].value;
637 }
638 }
639 }
640
641 // convert array of mask/lengths to array of mask/limits
642 limit = 0;
643 for (int i = 0; i < maxIndex; ++i) {
644 int32_t info = newSampleInfo[i];
645 int32_t len = info & ~LIMIT_MASK;
646 limit += len;
647 // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
648 // it's not really unlimited, so mark it as limited
649 int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
650 newSampleInfo[i] = limit | mask;
651 }
652
653 // ok, we've got good data
654 mSamples = values.orphan();
655 mSampleInfo = newSampleInfo.orphan();
656 mSampleInfoCount = maxIndex;
657 }
658
659 void
660 PluralRules::addRules(RuleChain& rules) {
661 RuleChain *newRule = new RuleChain(rules);
662 this->mRules=newRule;
663 newRule->setRepeatLimit();
664 }
665
666 UnicodeString
667 PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
668 UnicodeString emptyStr;
669
670 if (U_FAILURE(errCode)) {
671 return emptyStr;
672 }
673 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode));
674 if(U_FAILURE(errCode)) {
675 return emptyStr;
676 }
677 const char *typeKey;
678 switch (type) {
679 case UPLURAL_TYPE_CARDINAL:
680 typeKey = "locales";
681 break;
682 case UPLURAL_TYPE_ORDINAL:
683 typeKey = "locales_ordinals";
684 break;
685 default:
686 // Must not occur: The caller should have checked for valid types.
687 errCode = U_ILLEGAL_ARGUMENT_ERROR;
688 return emptyStr;
689 }
690 LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode));
691 if(U_FAILURE(errCode)) {
692 return emptyStr;
693 }
694 int32_t resLen=0;
695 const char *curLocaleName=locale.getName();
696 const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
697
698 if (s == NULL) {
699 // Check parent locales.
700 UErrorCode status = U_ZERO_ERROR;
701 char parentLocaleName[ULOC_FULLNAME_CAPACITY];
702 const char *curLocaleName=locale.getName();
703 uprv_strcpy(parentLocaleName, curLocaleName);
704
705 while (uloc_getParent(parentLocaleName, parentLocaleName,
706 ULOC_FULLNAME_CAPACITY, &status) > 0) {
707 resLen=0;
708 s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
709 if (s != NULL) {
710 errCode = U_ZERO_ERROR;
711 break;
712 }
713 status = U_ZERO_ERROR;
714 }
715 }
716 if (s==NULL) {
717 return emptyStr;
718 }
719
720 char setKey[256];
721 UChar result[256];
722 u_UCharsToChars(s, setKey, resLen + 1);
723 // printf("\n PluralRule: %s\n", setKey);
724
725
726 LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode));
727 if(U_FAILURE(errCode)) {
728 return emptyStr;
729 }
730 resLen=0;
731 LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode));
732 if (U_FAILURE(errCode)) {
733 return emptyStr;
734 }
735
736 int32_t numberKeys = ures_getSize(setRes.getAlias());
737 char *key=NULL;
738 int32_t len=0;
739 for(int32_t i=0; i<numberKeys; ++i) {
740 int32_t keyLen;
741 resLen=0;
742 s=ures_getNextString(setRes.getAlias(), &resLen, (const char**)&key, &errCode);
743 keyLen = (int32_t)uprv_strlen(key);
744 u_charsToUChars(key, result+len, keyLen);
745 len += keyLen;
746 result[len++]=COLON;
747 uprv_memcpy(result+len, s, resLen*sizeof(UChar));
748 len += resLen;
749 result[len++]=SEMI_COLON;
750 }
751 result[len++]=0;
752 u_UCharsToChars(result, setKey, len);
753 // printf(" Rule: %s\n", setKey);
754
755 return UnicodeString(result);
756 }
757
758 AndConstraint::AndConstraint() {
759 op = AndConstraint::NONE;
760 opNum=-1;
761 rangeLow=-1;
762 rangeHigh=-1;
763 notIn=FALSE;
764 integerOnly=FALSE;
765 next=NULL;
766 }
767
768
769 AndConstraint::AndConstraint(const AndConstraint& other) {
770 this->op = other.op;
771 this->opNum=other.opNum;
772 this->rangeLow=other.rangeLow;
773 this->rangeHigh=other.rangeHigh;
774 this->integerOnly=other.integerOnly;
775 this->notIn=other.notIn;
776 if (other.next==NULL) {
777 this->next=NULL;
778 }
779 else {
780 this->next = new AndConstraint(*other.next);
781 }
782 }
783
784 AndConstraint::~AndConstraint() {
785 if (next!=NULL) {
786 delete next;
787 }
788 }
789
790
791 UBool
792 AndConstraint::isFulfilled(double number) {
793 UBool result=TRUE;
794 double value=number;
795
796 // arrrrrrgh
797 if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
798 return notIn;
799 }
800
801 if ( op == MOD ) {
802 value = (int32_t)value % opNum;
803 }
804 if ( rangeHigh == -1 ) {
805 if ( rangeLow == -1 ) {
806 result = TRUE; // empty rule
807 }
808 else {
809 if ( value == rangeLow ) {
810 result = TRUE;
811 }
812 else {
813 result = FALSE;
814 }
815 }
816 }
817 else {
818 if ((rangeLow <= value) && (value <= rangeHigh)) {
819 if (integerOnly) {
820 if ( value != (int32_t)value) {
821 result = FALSE;
822 }
823 else {
824 result = TRUE;
825 }
826 }
827 else {
828 result = TRUE;
829 }
830 }
831 else {
832 result = FALSE;
833 }
834 }
835 if (notIn) {
836 return !result;
837 }
838 else {
839 return result;
840 }
841 }
842
843 UBool
844 AndConstraint::isLimited() {
845 return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
846 }
847
848 int32_t
849 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
850
851 if ( op == MOD ) {
852 return uprv_max(opNum, maxLimit);
853 }
854 else {
855 if ( rangeHigh == -1 ) {
856 return uprv_max(rangeLow, maxLimit);
857 }
858 else{
859 return uprv_max(rangeHigh, maxLimit);
860 }
861 }
862 }
863
864
865 AndConstraint*
866 AndConstraint::add()
867 {
868 this->next = new AndConstraint();
869 return this->next;
870 }
871
872 OrConstraint::OrConstraint() {
873 childNode=NULL;
874 next=NULL;
875 }
876
877 OrConstraint::OrConstraint(const OrConstraint& other) {
878 if ( other.childNode == NULL ) {
879 this->childNode = NULL;
880 }
881 else {
882 this->childNode = new AndConstraint(*(other.childNode));
883 }
884 if (other.next == NULL ) {
885 this->next = NULL;
886 }
887 else {
888 this->next = new OrConstraint(*(other.next));
889 }
890 }
891
892 OrConstraint::~OrConstraint() {
893 if (childNode!=NULL) {
894 delete childNode;
895 }
896 if (next!=NULL) {
897 delete next;
898 }
899 }
900
901 AndConstraint*
902 OrConstraint::add()
903 {
904 OrConstraint *curOrConstraint=this;
905 {
906 while (curOrConstraint->next!=NULL) {
907 curOrConstraint = curOrConstraint->next;
908 }
909 curOrConstraint->next = NULL;
910 curOrConstraint->childNode = new AndConstraint();
911 }
912 return curOrConstraint->childNode;
913 }
914
915 UBool
916 OrConstraint::isFulfilled(double number) {
917 OrConstraint* orRule=this;
918 UBool result=FALSE;
919
920 while (orRule!=NULL && !result) {
921 result=TRUE;
922 AndConstraint* andRule = orRule->childNode;
923 while (andRule!=NULL && result) {
924 result = andRule->isFulfilled(number);
925 andRule=andRule->next;
926 }
927 orRule = orRule->next;
928 }
929
930 return result;
931 }
932
933 UBool
934 OrConstraint::isLimited() {
935 for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
936 UBool result = FALSE;
937 for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
938 if (andc->isLimited()) {
939 result = TRUE;
940 break;
941 }
942 }
943 if (result == FALSE) {
944 return FALSE;
945 }
946 }
947 return TRUE;
948 }
949
950 RuleChain::RuleChain() {
951 ruleHeader=NULL;
952 next = NULL;
953 repeatLimit=0;
954 }
955
956 RuleChain::RuleChain(const RuleChain& other) {
957 this->repeatLimit = other.repeatLimit;
958 this->keyword=other.keyword;
959 if (other.ruleHeader != NULL) {
960 this->ruleHeader = new OrConstraint(*(other.ruleHeader));
961 }
962 else {
963 this->ruleHeader = NULL;
964 }
965 if (other.next != NULL ) {
966 this->next = new RuleChain(*other.next);
967 }
968 else
969 {
970 this->next = NULL;
971 }
972 }
973
974 RuleChain::~RuleChain() {
975 if (next != NULL) {
976 delete next;
977 }
978 if ( ruleHeader != NULL ) {
979 delete ruleHeader;
980 }
981 }
982
983 UnicodeString
984 RuleChain::select(double number) const {
985
986 if ( ruleHeader != NULL ) {
987 if (ruleHeader->isFulfilled(number)) {
988 return keyword;
989 }
990 }
991 if ( next != NULL ) {
992 return next->select(number);
993 }
994 else {
995 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
996 }
997
998 }
999
1000 void
1001 RuleChain::dumpRules(UnicodeString& result) {
1002 UChar digitString[16];
1003
1004 if ( ruleHeader != NULL ) {
1005 result += keyword;
1006 OrConstraint* orRule=ruleHeader;
1007 while ( orRule != NULL ) {
1008 AndConstraint* andRule=orRule->childNode;
1009 while ( andRule != NULL ) {
1010 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
1011 result += UNICODE_STRING_SIMPLE(" n is ");
1012 if (andRule->notIn) {
1013 result += UNICODE_STRING_SIMPLE("not ");
1014 }
1015 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1016 result += UnicodeString(digitString);
1017 }
1018 else {
1019 if (andRule->op==AndConstraint::MOD) {
1020 result += UNICODE_STRING_SIMPLE(" n mod ");
1021 uprv_itou(digitString,16, andRule->opNum,10,0);
1022 result += UnicodeString(digitString);
1023 }
1024 else {
1025 result += UNICODE_STRING_SIMPLE(" n ");
1026 }
1027 if (andRule->rangeHigh==-1) {
1028 if (andRule->notIn) {
1029 result += UNICODE_STRING_SIMPLE(" is not ");
1030 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1031 result += UnicodeString(digitString);
1032 }
1033 else {
1034 result += UNICODE_STRING_SIMPLE(" is ");
1035 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1036 result += UnicodeString(digitString);
1037 }
1038 }
1039 else {
1040 if (andRule->notIn) {
1041 if ( andRule->integerOnly ) {
1042 result += UNICODE_STRING_SIMPLE(" not in ");
1043 }
1044 else {
1045 result += UNICODE_STRING_SIMPLE(" not within ");
1046 }
1047 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1048 result += UnicodeString(digitString);
1049 result += UNICODE_STRING_SIMPLE(" .. ");
1050 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1051 result += UnicodeString(digitString);
1052 }
1053 else {
1054 if ( andRule->integerOnly ) {
1055 result += UNICODE_STRING_SIMPLE(" in ");
1056 }
1057 else {
1058 result += UNICODE_STRING_SIMPLE(" within ");
1059 }
1060 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1061 result += UnicodeString(digitString);
1062 result += UNICODE_STRING_SIMPLE(" .. ");
1063 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1064 }
1065 }
1066 }
1067 if ( (andRule=andRule->next) != NULL) {
1068 result.append(PK_AND, 3);
1069 }
1070 }
1071 if ( (orRule = orRule->next) != NULL ) {
1072 result.append(PK_OR, 2);
1073 }
1074 }
1075 }
1076 if ( next != NULL ) {
1077 next->dumpRules(result);
1078 }
1079 }
1080
1081 int32_t
1082 RuleChain::getRepeatLimit () {
1083 return repeatLimit;
1084 }
1085
1086 void
1087 RuleChain::setRepeatLimit () {
1088 int32_t limit=0;
1089
1090 if ( next != NULL ) {
1091 next->setRepeatLimit();
1092 limit = next->repeatLimit;
1093 }
1094
1095 if ( ruleHeader != NULL ) {
1096 OrConstraint* orRule=ruleHeader;
1097 while ( orRule != NULL ) {
1098 AndConstraint* andRule=orRule->childNode;
1099 while ( andRule != NULL ) {
1100 limit = andRule->updateRepeatLimit(limit);
1101 andRule = andRule->next;
1102 }
1103 orRule = orRule->next;
1104 }
1105 }
1106 repeatLimit = limit;
1107 }
1108
1109 UErrorCode
1110 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1111 if ( arraySize < capacityOfKeywords-1 ) {
1112 keywords[arraySize++]=keyword;
1113 }
1114 else {
1115 return U_BUFFER_OVERFLOW_ERROR;
1116 }
1117
1118 if ( next != NULL ) {
1119 return next->getKeywords(capacityOfKeywords, keywords, arraySize);
1120 }
1121 else {
1122 return U_ZERO_ERROR;
1123 }
1124 }
1125
1126 UBool
1127 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1128 if ( keyword == keywordParam ) {
1129 return TRUE;
1130 }
1131
1132 if ( next != NULL ) {
1133 return next->isKeyword(keywordParam);
1134 }
1135 else {
1136 return FALSE;
1137 }
1138 }
1139
1140
1141 RuleParser::RuleParser() {
1142 }
1143
1144 RuleParser::~RuleParser() {
1145 }
1146
1147 void
1148 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
1149 {
1150 if (U_FAILURE(status)) {
1151 return;
1152 }
1153 switch(prevType) {
1154 case none:
1155 case tSemiColon:
1156 if (curType!=tKeyword) {
1157 status = U_UNEXPECTED_TOKEN;
1158 }
1159 break;
1160 case tVariableN :
1161 if (curType != tIs && curType != tMod && curType != tIn &&
1162 curType != tNot && curType != tWithin) {
1163 status = U_UNEXPECTED_TOKEN;
1164 }
1165 break;
1166 case tZero:
1167 case tOne:
1168 case tTwo:
1169 case tFew:
1170 case tMany:
1171 case tOther:
1172 case tKeyword:
1173 if (curType != tColon) {
1174 status = U_UNEXPECTED_TOKEN;
1175 }
1176 break;
1177 case tColon :
1178 if (curType != tVariableN) {
1179 status = U_UNEXPECTED_TOKEN;
1180 }
1181 break;
1182 case tIs:
1183 if ( curType != tNumber && curType != tNot) {
1184 status = U_UNEXPECTED_TOKEN;
1185 }
1186 break;
1187 case tNot:
1188 if (curType != tNumber && curType != tIn && curType != tWithin) {
1189 status = U_UNEXPECTED_TOKEN;
1190 }
1191 break;
1192 case tMod:
1193 case tDot:
1194 case tIn:
1195 case tWithin:
1196 case tAnd:
1197 case tOr:
1198 if (curType != tNumber && curType != tVariableN) {
1199 status = U_UNEXPECTED_TOKEN;
1200 }
1201 break;
1202 case tNumber:
1203 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
1204 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
1205 {
1206 status = U_UNEXPECTED_TOKEN;
1207 }
1208 break;
1209 default:
1210 status = U_UNEXPECTED_TOKEN;
1211 break;
1212 }
1213 }
1214
1215 void
1216 RuleParser::getNextToken(const UnicodeString& ruleData,
1217 int32_t *ruleIndex,
1218 UnicodeString& token,
1219 tokenType& type,
1220 UErrorCode &status)
1221 {
1222 int32_t curIndex= *ruleIndex;
1223 UChar ch;
1224 tokenType prevType=none;
1225
1226 if (U_FAILURE(status)) {
1227 return;
1228 }
1229 while (curIndex<ruleData.length()) {
1230 ch = ruleData.charAt(curIndex);
1231 if ( !inRange(ch, type) ) {
1232 status = U_ILLEGAL_CHARACTER;
1233 return;
1234 }
1235 switch (type) {
1236 case tSpace:
1237 if ( *ruleIndex != curIndex ) { // letter
1238 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1239 *ruleIndex=curIndex;
1240 type=prevType;
1241 getKeyType(token, type, status);
1242 return;
1243 }
1244 else {
1245 *ruleIndex=*ruleIndex+1;
1246 }
1247 break; // consective space
1248 case tColon:
1249 case tSemiColon:
1250 if ( *ruleIndex != curIndex ) {
1251 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1252 *ruleIndex=curIndex;
1253 type=prevType;
1254 getKeyType(token, type, status);
1255 return;
1256 }
1257 else {
1258 *ruleIndex=curIndex+1;
1259 return;
1260 }
1261 case tLetter:
1262 if ((type==prevType)||(prevType==none)) {
1263 prevType=type;
1264 break;
1265 }
1266 break;
1267 case tNumber:
1268 if ((type==prevType)||(prevType==none)) {
1269 prevType=type;
1270 break;
1271 }
1272 else {
1273 *ruleIndex=curIndex+1;
1274 return;
1275 }
1276 case tDot:
1277 if (prevType==none) { // first dot
1278 prevType=type;
1279 continue;
1280 }
1281 else {
1282 if ( *ruleIndex != curIndex ) {
1283 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1284 *ruleIndex=curIndex; // letter
1285 type=prevType;
1286 getKeyType(token, type, status);
1287 return;
1288 }
1289 else { // two consective dots
1290 *ruleIndex=curIndex+2;
1291 return;
1292 }
1293 }
1294 default:
1295 status = U_UNEXPECTED_TOKEN;
1296 return;
1297 }
1298 curIndex++;
1299 }
1300 if ( curIndex>=ruleData.length() ) {
1301 if ( (type == tLetter)||(type == tNumber) ) {
1302 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1303 getKeyType(token, type, status);
1304 if (U_FAILURE(status)) {
1305 return;
1306 }
1307 }
1308 *ruleIndex = ruleData.length();
1309 }
1310 }
1311
1312 UBool
1313 RuleParser::inRange(UChar ch, tokenType& type) {
1314 if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1315 // we assume all characters are in lower case already.
1316 return FALSE;
1317 }
1318 if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1319 type = tLetter;
1320 return TRUE;
1321 }
1322 if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1323 type = tNumber;
1324 return TRUE;
1325 }
1326 switch (ch) {
1327 case COLON:
1328 type = tColon;
1329 return TRUE;
1330 case SPACE:
1331 type = tSpace;
1332 return TRUE;
1333 case SEMI_COLON:
1334 type = tSemiColon;
1335 return TRUE;
1336 case DOT:
1337 type = tDot;
1338 return TRUE;
1339 default :
1340 type = none;
1341 return FALSE;
1342 }
1343 }
1344
1345
1346 void
1347 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1348 {
1349 if (U_FAILURE(status)) {
1350 return;
1351 }
1352 if ( keyType==tNumber) {
1353 }
1354 else if (0 == token.compare(PK_VAR_N, 1)) {
1355 keyType = tVariableN;
1356 }
1357 else if (0 == token.compare(PK_IS, 2)) {
1358 keyType = tIs;
1359 }
1360 else if (0 == token.compare(PK_AND, 3)) {
1361 keyType = tAnd;
1362 }
1363 else if (0 == token.compare(PK_IN, 2)) {
1364 keyType = tIn;
1365 }
1366 else if (0 == token.compare(PK_WITHIN, 6)) {
1367 keyType = tWithin;
1368 }
1369 else if (0 == token.compare(PK_NOT, 3)) {
1370 keyType = tNot;
1371 }
1372 else if (0 == token.compare(PK_MOD, 3)) {
1373 keyType = tMod;
1374 }
1375 else if (0 == token.compare(PK_OR, 2)) {
1376 keyType = tOr;
1377 }
1378 else if ( isValidKeyword(token) ) {
1379 keyType = tKeyword;
1380 }
1381 else {
1382 status = U_UNEXPECTED_TOKEN;
1383 }
1384 }
1385
1386 UBool
1387 RuleParser::isValidKeyword(const UnicodeString& token) {
1388 return PatternProps::isIdentifier(token.getBuffer(), token.length());
1389 }
1390
1391 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1392 : pos(0), fKeywordNames(status) {
1393 if (U_FAILURE(status)) {
1394 return;
1395 }
1396 fKeywordNames.setDeleter(uprv_deleteUObject);
1397 UBool addKeywordOther=TRUE;
1398 RuleChain *node=header;
1399 while(node!=NULL) {
1400 fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1401 if (U_FAILURE(status)) {
1402 return;
1403 }
1404 if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1405 addKeywordOther= FALSE;
1406 }
1407 node=node->next;
1408 }
1409
1410 if (addKeywordOther) {
1411 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1412 }
1413 }
1414
1415 const UnicodeString*
1416 PluralKeywordEnumeration::snext(UErrorCode& status) {
1417 if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1418 return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1419 }
1420 return NULL;
1421 }
1422
1423 void
1424 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1425 pos=0;
1426 }
1427
1428 int32_t
1429 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1430 return fKeywordNames.size();
1431 }
1432
1433 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1434 }
1435
1436 U_NAMESPACE_END
1437
1438
1439 #endif /* #if !UCONFIG_NO_FORMATTING */
1440
1441 //eof