]>
Commit | Line | Data |
---|---|---|
46f4442e A |
1 | /* |
2 | ******************************************************************************* | |
51004dcb | 3 | * Copyright (C) 2007-2012, International Business Machines Corporation and |
46f4442e A |
4 | * others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * | |
51004dcb | 7 | * File plurrule.cpp |
46f4442e A |
8 | */ |
9 | ||
46f4442e | 10 | #include "unicode/utypes.h" |
4388f060 | 11 | #include "unicode/localpointer.h" |
46f4442e | 12 | #include "unicode/plurrule.h" |
51004dcb | 13 | #include "unicode/upluralrules.h" |
4388f060 | 14 | #include "unicode/ures.h" |
46f4442e A |
15 | #include "cmemory.h" |
16 | #include "cstring.h" | |
17 | #include "hash.h" | |
18 | #include "mutex.h" | |
4388f060 | 19 | #include "patternprops.h" |
46f4442e A |
20 | #include "plurrule_impl.h" |
21 | #include "putilimp.h" | |
22 | #include "ucln_in.h" | |
23 | #include "ustrfmt.h" | |
24 | #include "locutil.h" | |
4388f060 | 25 | #include "uassert.h" |
46f4442e A |
26 | |
27 | #if !UCONFIG_NO_FORMATTING | |
28 | ||
29 | U_NAMESPACE_BEGIN | |
30 | ||
4388f060 | 31 | // shared by all instances when lazy-initializing samples |
51004dcb | 32 | static UMutex pluralMutex = U_MUTEX_INITIALIZER; |
46f4442e A |
33 | |
34 | #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) | |
35 | ||
46f4442e A |
36 | static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; |
37 | static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; | |
38 | static const UChar PK_IN[]={LOW_I,LOW_N,0}; | |
39 | static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; | |
40 | static const UChar PK_IS[]={LOW_I,LOW_S,0}; | |
41 | static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; | |
42 | static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; | |
43 | static const UChar PK_OR[]={LOW_O,LOW_R,0}; | |
44 | static const UChar PK_VAR_N[]={LOW_N,0}; | |
45 | static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; | |
46 | ||
47 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) | |
48 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) | |
49 | ||
50 | PluralRules::PluralRules(UErrorCode& status) | |
51 | : UObject(), | |
4388f060 A |
52 | mRules(NULL), |
53 | mParser(NULL), | |
54 | mSamples(NULL), | |
55 | mSampleInfo(NULL), | |
56 | mSampleInfoCount(0) | |
46f4442e | 57 | { |
729e4ab9 A |
58 | if (U_FAILURE(status)) { |
59 | return; | |
60 | } | |
61 | mParser = new RuleParser(); | |
46f4442e A |
62 | if (mParser==NULL) { |
63 | status = U_MEMORY_ALLOCATION_ERROR; | |
64 | } | |
65 | } | |
66 | ||
67 | PluralRules::PluralRules(const PluralRules& other) | |
68 | : UObject(other), | |
69 | mRules(NULL), | |
4388f060 A |
70 | mParser(NULL), |
71 | mSamples(NULL), | |
72 | mSampleInfo(NULL), | |
73 | mSampleInfoCount(0) | |
46f4442e A |
74 | { |
75 | *this=other; | |
76 | } | |
77 | ||
78 | PluralRules::~PluralRules() { | |
79 | delete mRules; | |
80 | delete mParser; | |
4388f060 A |
81 | uprv_free(mSamples); |
82 | uprv_free(mSampleInfo); | |
46f4442e A |
83 | } |
84 | ||
85 | PluralRules* | |
86 | PluralRules::clone() const { | |
87 | return new PluralRules(*this); | |
88 | } | |
89 | ||
90 | PluralRules& | |
91 | PluralRules::operator=(const PluralRules& other) { | |
92 | if (this != &other) { | |
93 | delete mRules; | |
94 | if (other.mRules==NULL) { | |
95 | mRules = NULL; | |
96 | } | |
97 | else { | |
98 | mRules = new RuleChain(*other.mRules); | |
99 | } | |
100 | delete mParser; | |
101 | mParser = new RuleParser(); | |
4388f060 A |
102 | |
103 | uprv_free(mSamples); | |
104 | mSamples = NULL; | |
105 | ||
106 | uprv_free(mSampleInfo); | |
107 | mSampleInfo = NULL; | |
108 | mSampleInfoCount = 0; | |
46f4442e A |
109 | } |
110 | ||
111 | return *this; | |
112 | } | |
113 | ||
114 | PluralRules* U_EXPORT2 | |
115 | PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { | |
116 | RuleChain rules; | |
117 | ||
729e4ab9 A |
118 | if (U_FAILURE(status)) { |
119 | return NULL; | |
120 | } | |
46f4442e A |
121 | PluralRules *newRules = new PluralRules(status); |
122 | if ( (newRules != NULL)&& U_SUCCESS(status) ) { | |
123 | newRules->parseDescription((UnicodeString &)description, rules, status); | |
124 | if (U_SUCCESS(status)) { | |
125 | newRules->addRules(rules); | |
126 | } | |
127 | } | |
128 | if (U_FAILURE(status)) { | |
129 | delete newRules; | |
130 | return NULL; | |
131 | } | |
132 | else { | |
133 | return newRules; | |
134 | } | |
135 | } | |
136 | ||
137 | PluralRules* U_EXPORT2 | |
138 | PluralRules::createDefaultRules(UErrorCode& status) { | |
4388f060 | 139 | return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status); |
46f4442e A |
140 | } |
141 | ||
142 | PluralRules* U_EXPORT2 | |
143 | PluralRules::forLocale(const Locale& locale, UErrorCode& status) { | |
51004dcb A |
144 | return forLocale(locale, UPLURAL_TYPE_CARDINAL, status); |
145 | } | |
146 | ||
147 | PluralRules* U_EXPORT2 | |
148 | PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) { | |
46f4442e | 149 | RuleChain rChain; |
729e4ab9 A |
150 | if (U_FAILURE(status)) { |
151 | return NULL; | |
152 | } | |
51004dcb A |
153 | if (type >= UPLURAL_TYPE_COUNT) { |
154 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
155 | return NULL; | |
156 | } | |
46f4442e | 157 | PluralRules *newObj = new PluralRules(status); |
729e4ab9 | 158 | if (newObj==NULL || U_FAILURE(status)) { |
4388f060 | 159 | delete newObj; |
46f4442e A |
160 | return NULL; |
161 | } | |
51004dcb | 162 | UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); |
46f4442e A |
163 | if ((locRule.length() != 0) && U_SUCCESS(status)) { |
164 | newObj->parseDescription(locRule, rChain, status); | |
165 | if (U_SUCCESS(status)) { | |
166 | newObj->addRules(rChain); | |
167 | } | |
168 | } | |
169 | if (U_FAILURE(status)||(locRule.length() == 0)) { | |
170 | // use default plural rule | |
171 | status = U_ZERO_ERROR; | |
172 | UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE); | |
173 | newObj->parseDescription(defRule, rChain, status); | |
174 | newObj->addRules(rChain); | |
175 | } | |
4388f060 | 176 | |
46f4442e A |
177 | return newObj; |
178 | } | |
179 | ||
180 | UnicodeString | |
181 | PluralRules::select(int32_t number) const { | |
182 | if (mRules == NULL) { | |
4388f060 | 183 | return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); |
46f4442e A |
184 | } |
185 | else { | |
186 | return mRules->select(number); | |
187 | } | |
188 | } | |
189 | ||
190 | UnicodeString | |
191 | PluralRules::select(double number) const { | |
192 | if (mRules == NULL) { | |
4388f060 | 193 | return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); |
46f4442e A |
194 | } |
195 | else { | |
196 | return mRules->select(number); | |
197 | } | |
198 | } | |
199 | ||
200 | StringEnumeration* | |
201 | PluralRules::getKeywords(UErrorCode& status) const { | |
202 | if (U_FAILURE(status)) return NULL; | |
203 | StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); | |
4388f060 A |
204 | if (U_FAILURE(status)) { |
205 | delete nameEnumerator; | |
206 | return NULL; | |
207 | } | |
729e4ab9 | 208 | |
46f4442e A |
209 | return nameEnumerator; |
210 | } | |
211 | ||
4388f060 A |
212 | double |
213 | PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) { | |
214 | double val = 0.0; | |
215 | UErrorCode status = U_ZERO_ERROR; | |
216 | int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status); | |
217 | return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE; | |
218 | } | |
219 | ||
220 | int32_t | |
221 | PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest, | |
222 | int32_t destCapacity, UErrorCode& error) { | |
223 | return getSamplesInternal(keyword, dest, destCapacity, FALSE, error); | |
224 | } | |
225 | ||
226 | int32_t | |
227 | PluralRules::getSamples(const UnicodeString &keyword, double *dest, | |
228 | int32_t destCapacity, UErrorCode& status) { | |
229 | return getSamplesInternal(keyword, dest, destCapacity, TRUE, status); | |
230 | } | |
231 | ||
232 | int32_t | |
233 | PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest, | |
234 | int32_t destCapacity, UBool includeUnlimited, | |
235 | UErrorCode& status) { | |
236 | initSamples(status); | |
237 | if (U_FAILURE(status)) { | |
238 | return -1; | |
239 | } | |
240 | if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) { | |
241 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
242 | return -1; | |
243 | } | |
244 | ||
245 | int32_t index = getKeywordIndex(keyword, status); | |
246 | if (index == -1) { | |
247 | return 0; | |
248 | } | |
249 | ||
250 | const int32_t LIMIT_MASK = 0x1 << 31; | |
251 | ||
252 | if (!includeUnlimited) { | |
253 | if ((mSampleInfo[index] & LIMIT_MASK) == 0) { | |
254 | return -1; | |
255 | } | |
256 | } | |
257 | ||
258 | int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK; | |
259 | int32_t limit = mSampleInfo[index] & ~LIMIT_MASK; | |
260 | int32_t len = limit - start; | |
261 | if (len <= destCapacity) { | |
262 | destCapacity = len; | |
263 | } else if (includeUnlimited) { | |
264 | len = destCapacity; // no overflow, and don't report more than we copy | |
265 | } else { | |
266 | status = U_BUFFER_OVERFLOW_ERROR; | |
267 | return len; | |
268 | } | |
269 | for (int32_t i = 0; i < destCapacity; ++i, ++start) { | |
270 | dest[i] = mSamples[start]; | |
271 | } | |
272 | return len; | |
273 | } | |
274 | ||
46f4442e A |
275 | |
276 | UBool | |
277 | PluralRules::isKeyword(const UnicodeString& keyword) const { | |
4388f060 | 278 | if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
46f4442e A |
279 | return true; |
280 | } | |
281 | else { | |
282 | if (mRules==NULL) { | |
283 | return false; | |
284 | } | |
285 | else { | |
286 | return mRules->isKeyword(keyword); | |
287 | } | |
288 | } | |
289 | } | |
290 | ||
291 | UnicodeString | |
292 | PluralRules::getKeywordOther() const { | |
4388f060 | 293 | return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
46f4442e A |
294 | } |
295 | ||
296 | UBool | |
297 | PluralRules::operator==(const PluralRules& other) const { | |
298 | int32_t limit; | |
46f4442e A |
299 | const UnicodeString *ptrKeyword; |
300 | UErrorCode status= U_ZERO_ERROR; | |
301 | ||
302 | if ( this == &other ) { | |
303 | return TRUE; | |
304 | } | |
4388f060 A |
305 | LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); |
306 | LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); | |
729e4ab9 A |
307 | if (U_FAILURE(status)) { |
308 | return FALSE; | |
309 | } | |
46f4442e | 310 | |
4388f060 A |
311 | if (myKeywordList->count(status)!=otherKeywordList->count(status)) { |
312 | return FALSE; | |
46f4442e | 313 | } |
4388f060 A |
314 | myKeywordList->reset(status); |
315 | while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { | |
316 | if (!other.isKeyword(*ptrKeyword)) { | |
729e4ab9 A |
317 | return FALSE; |
318 | } | |
4388f060 A |
319 | } |
320 | otherKeywordList->reset(status); | |
321 | while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { | |
322 | if (!this->isKeyword(*ptrKeyword)) { | |
46f4442e A |
323 | return FALSE; |
324 | } | |
325 | } | |
4388f060 A |
326 | if (U_FAILURE(status)) { |
327 | return FALSE; | |
328 | } | |
46f4442e A |
329 | |
330 | if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) { | |
331 | return FALSE; | |
332 | } | |
333 | UnicodeString myKeyword, otherKeyword; | |
334 | for (int32_t i=0; i<limit; ++i) { | |
335 | myKeyword = this->select(i); | |
336 | otherKeyword = other.select(i); | |
337 | if (myKeyword!=otherKeyword) { | |
338 | return FALSE; | |
339 | } | |
340 | } | |
341 | return TRUE; | |
342 | } | |
343 | ||
344 | void | |
345 | PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status) | |
346 | { | |
347 | int32_t ruleIndex=0; | |
348 | UnicodeString token; | |
349 | tokenType type; | |
350 | tokenType prevType=none; | |
351 | RuleChain *ruleChain=NULL; | |
352 | AndConstraint *curAndConstraint=NULL; | |
353 | OrConstraint *orNode=NULL; | |
354 | RuleChain *lastChain=NULL; | |
355 | ||
729e4ab9 A |
356 | if (U_FAILURE(status)) { |
357 | return; | |
358 | } | |
4388f060 | 359 | UnicodeString ruleData = data.toLower(""); |
46f4442e A |
360 | while (ruleIndex< ruleData.length()) { |
361 | mParser->getNextToken(ruleData, &ruleIndex, token, type, status); | |
362 | if (U_FAILURE(status)) { | |
363 | return; | |
364 | } | |
365 | mParser->checkSyntax(prevType, type, status); | |
366 | if (U_FAILURE(status)) { | |
367 | return; | |
368 | } | |
369 | switch (type) { | |
370 | case tAnd: | |
4388f060 | 371 | U_ASSERT(curAndConstraint != NULL); |
46f4442e A |
372 | curAndConstraint = curAndConstraint->add(); |
373 | break; | |
374 | case tOr: | |
375 | lastChain = &rules; | |
376 | while (lastChain->next !=NULL) { | |
377 | lastChain = lastChain->next; | |
378 | } | |
379 | orNode=lastChain->ruleHeader; | |
380 | while (orNode->next != NULL) { | |
381 | orNode = orNode->next; | |
382 | } | |
383 | orNode->next= new OrConstraint(); | |
384 | orNode=orNode->next; | |
385 | orNode->next=NULL; | |
386 | curAndConstraint = orNode->add(); | |
387 | break; | |
388 | case tIs: | |
4388f060 | 389 | U_ASSERT(curAndConstraint != NULL); |
46f4442e A |
390 | curAndConstraint->rangeHigh=-1; |
391 | break; | |
392 | case tNot: | |
4388f060 | 393 | U_ASSERT(curAndConstraint != NULL); |
46f4442e A |
394 | curAndConstraint->notIn=TRUE; |
395 | break; | |
396 | case tIn: | |
4388f060 | 397 | U_ASSERT(curAndConstraint != NULL); |
46f4442e A |
398 | curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; |
399 | curAndConstraint->integerOnly = TRUE; | |
400 | break; | |
401 | case tWithin: | |
4388f060 | 402 | U_ASSERT(curAndConstraint != NULL); |
46f4442e A |
403 | curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; |
404 | break; | |
405 | case tNumber: | |
4388f060 | 406 | U_ASSERT(curAndConstraint != NULL); |
46f4442e A |
407 | if ( (curAndConstraint->op==AndConstraint::MOD)&& |
408 | (curAndConstraint->opNum == -1 ) ) { | |
409 | curAndConstraint->opNum=getNumberValue(token); | |
410 | } | |
411 | else { | |
412 | if (curAndConstraint->rangeLow == -1) { | |
413 | curAndConstraint->rangeLow=getNumberValue(token); | |
414 | } | |
415 | else { | |
416 | curAndConstraint->rangeHigh=getNumberValue(token); | |
417 | } | |
418 | } | |
419 | break; | |
420 | case tMod: | |
4388f060 | 421 | U_ASSERT(curAndConstraint != NULL); |
46f4442e A |
422 | curAndConstraint->op=AndConstraint::MOD; |
423 | break; | |
424 | case tKeyword: | |
425 | if (ruleChain==NULL) { | |
426 | ruleChain = &rules; | |
427 | } | |
428 | else { | |
429 | while (ruleChain->next!=NULL){ | |
430 | ruleChain=ruleChain->next; | |
431 | } | |
432 | ruleChain=ruleChain->next=new RuleChain(); | |
433 | } | |
4388f060 A |
434 | if (ruleChain->ruleHeader != NULL) { |
435 | delete ruleChain->ruleHeader; | |
436 | } | |
46f4442e A |
437 | orNode = ruleChain->ruleHeader = new OrConstraint(); |
438 | curAndConstraint = orNode->add(); | |
439 | ruleChain->keyword = token; | |
440 | break; | |
441 | default: | |
442 | break; | |
443 | } | |
444 | prevType=type; | |
445 | } | |
446 | } | |
447 | ||
448 | int32_t | |
449 | PluralRules::getNumberValue(const UnicodeString& token) const { | |
450 | int32_t i; | |
451 | char digits[128]; | |
452 | ||
453 | i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); | |
454 | digits[i]='\0'; | |
455 | ||
456 | return((int32_t)atoi(digits)); | |
457 | } | |
458 | ||
459 | ||
460 | void | |
461 | PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) { | |
462 | int32_t i=*curIndex; | |
463 | ||
464 | localeName.remove(); | |
465 | while (i< localeData.length()) { | |
466 | if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) { | |
467 | break; | |
468 | } | |
469 | i++; | |
470 | } | |
471 | ||
472 | while (i< localeData.length()) { | |
473 | if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) { | |
474 | break; | |
475 | } | |
476 | localeName+=localeData.charAt(i++); | |
477 | } | |
478 | *curIndex=i; | |
479 | } | |
480 | ||
481 | ||
482 | int32_t | |
483 | PluralRules::getRepeatLimit() const { | |
484 | if (mRules!=NULL) { | |
485 | return mRules->getRepeatLimit(); | |
486 | } | |
487 | else { | |
488 | return 0; | |
489 | } | |
490 | } | |
491 | ||
4388f060 A |
492 | int32_t |
493 | PluralRules::getKeywordIndex(const UnicodeString& keyword, | |
494 | UErrorCode& status) const { | |
495 | if (U_SUCCESS(status)) { | |
496 | int32_t n = 0; | |
497 | RuleChain* rc = mRules; | |
498 | while (rc != NULL) { | |
499 | if (rc->ruleHeader != NULL) { | |
500 | if (rc->keyword == keyword) { | |
501 | return n; | |
502 | } | |
503 | ++n; | |
504 | } | |
505 | rc = rc->next; | |
506 | } | |
507 | if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { | |
508 | return n; | |
509 | } | |
510 | } | |
511 | return -1; | |
512 | } | |
513 | ||
514 | typedef struct SampleRecord { | |
515 | int32_t ruleIndex; | |
516 | double value; | |
517 | } SampleRecord; | |
518 | ||
519 | void | |
520 | PluralRules::initSamples(UErrorCode& status) { | |
521 | if (U_FAILURE(status)) { | |
522 | return; | |
523 | } | |
524 | Mutex lock(&pluralMutex); | |
525 | ||
526 | if (mSamples) { | |
527 | return; | |
528 | } | |
529 | ||
530 | // Note, the original design let you have multiple rules with the same keyword. But | |
531 | // we don't use that in our data and existing functions in this implementation don't | |
532 | // fully support it (for example, the returned keywords is a list and not a set). | |
533 | // | |
534 | // So I don't support this here either. If you ask for samples, or for all values, | |
535 | // you will get information about the first rule with that keyword, not all rules with | |
536 | // that keyword. | |
537 | ||
538 | int32_t maxIndex = 0; | |
539 | int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end | |
540 | RuleChain* rc = mRules; | |
541 | while (rc != NULL) { | |
542 | if (rc->ruleHeader != NULL) { | |
543 | if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { | |
544 | otherIndex = maxIndex; | |
545 | } | |
546 | ++maxIndex; | |
547 | } | |
548 | rc = rc->next; | |
549 | } | |
550 | if (otherIndex == -1) { | |
551 | ++maxIndex; | |
552 | } | |
553 | ||
554 | LocalMemory<int32_t> newSampleInfo; | |
555 | if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) { | |
556 | status = U_MEMORY_ALLOCATION_ERROR; | |
557 | return; | |
558 | } | |
559 | ||
560 | const int32_t LIMIT_MASK = 0x1 << 31; | |
561 | ||
562 | rc = mRules; | |
563 | int32_t n = 0; | |
564 | while (rc != NULL) { | |
565 | if (rc->ruleHeader != NULL) { | |
566 | newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0; | |
567 | } | |
568 | rc = rc->next; | |
569 | } | |
570 | if (otherIndex == -1) { | |
571 | newSampleInfo[maxIndex - 1] = 0; // unlimited | |
572 | } | |
573 | ||
574 | MaybeStackArray<SampleRecord, 10> newSamples; | |
575 | int32_t sampleCount = 0; | |
576 | ||
577 | int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2; | |
578 | if (limit < 10) { | |
579 | limit = 10; | |
580 | } | |
581 | ||
582 | for (int i = 0, keywordsRemaining = maxIndex; | |
583 | keywordsRemaining > 0 && i < limit; | |
584 | ++i) { | |
585 | double val = i / 2.0; | |
586 | ||
587 | n = 0; | |
588 | rc = mRules; | |
589 | int32_t found = -1; | |
590 | while (rc != NULL) { | |
591 | if (rc->ruleHeader != NULL) { | |
592 | if (rc->ruleHeader->isFulfilled(val)) { | |
593 | found = n; | |
594 | break; | |
595 | } | |
596 | ++n; | |
597 | } | |
598 | rc = rc->next; | |
599 | } | |
600 | if (found == -1) { | |
601 | // 'other'. If there is an 'other' rule, the rule set is bad since nothing | |
602 | // should leak through, but we don't bother to report that here. | |
603 | found = otherIndex == -1 ? maxIndex - 1 : otherIndex; | |
604 | } | |
605 | if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set | |
606 | continue; | |
607 | } | |
608 | newSampleInfo[found] += 1; // won't impact limit flag | |
609 | ||
610 | if (sampleCount == newSamples.getCapacity()) { | |
611 | int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2; | |
612 | if (NULL == newSamples.resize(newCapacity, sampleCount)) { | |
613 | status = U_MEMORY_ALLOCATION_ERROR; | |
614 | return; | |
615 | } | |
616 | } | |
617 | newSamples[sampleCount].ruleIndex = found; | |
618 | newSamples[sampleCount].value = val; | |
619 | ++sampleCount; | |
620 | ||
621 | if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set | |
622 | --keywordsRemaining; | |
623 | } | |
624 | } | |
625 | ||
626 | // sort the values by index, leaving order otherwise unchanged | |
627 | // this is just a selection sort for simplicity | |
628 | LocalMemory<double> values; | |
629 | if (NULL == values.allocateInsteadAndCopy(sampleCount)) { | |
630 | status = U_MEMORY_ALLOCATION_ERROR; | |
631 | return; | |
632 | } | |
633 | for (int i = 0, j = 0; i < maxIndex; ++i) { | |
634 | for (int k = 0; k < sampleCount; ++k) { | |
635 | if (newSamples[k].ruleIndex == i) { | |
636 | values[j++] = newSamples[k].value; | |
637 | } | |
638 | } | |
639 | } | |
640 | ||
641 | // convert array of mask/lengths to array of mask/limits | |
642 | limit = 0; | |
643 | for (int i = 0; i < maxIndex; ++i) { | |
644 | int32_t info = newSampleInfo[i]; | |
645 | int32_t len = info & ~LIMIT_MASK; | |
646 | limit += len; | |
647 | // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples, | |
648 | // it's not really unlimited, so mark it as limited | |
649 | int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK; | |
650 | newSampleInfo[i] = limit | mask; | |
651 | } | |
652 | ||
653 | // ok, we've got good data | |
654 | mSamples = values.orphan(); | |
655 | mSampleInfo = newSampleInfo.orphan(); | |
656 | mSampleInfoCount = maxIndex; | |
657 | } | |
46f4442e A |
658 | |
659 | void | |
660 | PluralRules::addRules(RuleChain& rules) { | |
661 | RuleChain *newRule = new RuleChain(rules); | |
662 | this->mRules=newRule; | |
663 | newRule->setRepeatLimit(); | |
664 | } | |
665 | ||
666 | UnicodeString | |
51004dcb | 667 | PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) { |
46f4442e | 668 | UnicodeString emptyStr; |
4388f060 | 669 | |
729e4ab9 A |
670 | if (U_FAILURE(errCode)) { |
671 | return emptyStr; | |
672 | } | |
51004dcb | 673 | LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode)); |
46f4442e | 674 | if(U_FAILURE(errCode)) { |
46f4442e A |
675 | return emptyStr; |
676 | } | |
51004dcb A |
677 | const char *typeKey; |
678 | switch (type) { | |
679 | case UPLURAL_TYPE_CARDINAL: | |
680 | typeKey = "locales"; | |
681 | break; | |
682 | case UPLURAL_TYPE_ORDINAL: | |
683 | typeKey = "locales_ordinals"; | |
684 | break; | |
685 | default: | |
686 | // Must not occur: The caller should have checked for valid types. | |
687 | errCode = U_ILLEGAL_ARGUMENT_ERROR; | |
688 | return emptyStr; | |
689 | } | |
690 | LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode)); | |
46f4442e | 691 | if(U_FAILURE(errCode)) { |
46f4442e | 692 | return emptyStr; |
4388f060 | 693 | } |
46f4442e A |
694 | int32_t resLen=0; |
695 | const char *curLocaleName=locale.getName(); | |
51004dcb | 696 | const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); |
46f4442e A |
697 | |
698 | if (s == NULL) { | |
699 | // Check parent locales. | |
700 | UErrorCode status = U_ZERO_ERROR; | |
701 | char parentLocaleName[ULOC_FULLNAME_CAPACITY]; | |
702 | const char *curLocaleName=locale.getName(); | |
46f4442e | 703 | uprv_strcpy(parentLocaleName, curLocaleName); |
4388f060 | 704 | |
51004dcb A |
705 | while (uloc_getParent(parentLocaleName, parentLocaleName, |
706 | ULOC_FULLNAME_CAPACITY, &status) > 0) { | |
46f4442e | 707 | resLen=0; |
51004dcb | 708 | s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); |
46f4442e A |
709 | if (s != NULL) { |
710 | errCode = U_ZERO_ERROR; | |
711 | break; | |
712 | } | |
713 | status = U_ZERO_ERROR; | |
714 | } | |
715 | } | |
716 | if (s==NULL) { | |
46f4442e A |
717 | return emptyStr; |
718 | } | |
4388f060 | 719 | |
46f4442e A |
720 | char setKey[256]; |
721 | UChar result[256]; | |
722 | u_UCharsToChars(s, setKey, resLen + 1); | |
723 | // printf("\n PluralRule: %s\n", setKey); | |
4388f060 | 724 | |
46f4442e | 725 | |
51004dcb | 726 | LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode)); |
46f4442e | 727 | if(U_FAILURE(errCode)) { |
46f4442e A |
728 | return emptyStr; |
729 | } | |
730 | resLen=0; | |
51004dcb | 731 | LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode)); |
46f4442e | 732 | if (U_FAILURE(errCode)) { |
46f4442e A |
733 | return emptyStr; |
734 | } | |
735 | ||
51004dcb | 736 | int32_t numberKeys = ures_getSize(setRes.getAlias()); |
46f4442e A |
737 | char *key=NULL; |
738 | int32_t len=0; | |
739 | for(int32_t i=0; i<numberKeys; ++i) { | |
740 | int32_t keyLen; | |
741 | resLen=0; | |
51004dcb | 742 | s=ures_getNextString(setRes.getAlias(), &resLen, (const char**)&key, &errCode); |
729e4ab9 | 743 | keyLen = (int32_t)uprv_strlen(key); |
46f4442e A |
744 | u_charsToUChars(key, result+len, keyLen); |
745 | len += keyLen; | |
746 | result[len++]=COLON; | |
747 | uprv_memcpy(result+len, s, resLen*sizeof(UChar)); | |
748 | len += resLen; | |
749 | result[len++]=SEMI_COLON; | |
750 | } | |
751 | result[len++]=0; | |
752 | u_UCharsToChars(result, setKey, len); | |
753 | // printf(" Rule: %s\n", setKey); | |
754 | ||
46f4442e | 755 | return UnicodeString(result); |
46f4442e A |
756 | } |
757 | ||
758 | AndConstraint::AndConstraint() { | |
759 | op = AndConstraint::NONE; | |
760 | opNum=-1; | |
761 | rangeLow=-1; | |
762 | rangeHigh=-1; | |
763 | notIn=FALSE; | |
764 | integerOnly=FALSE; | |
765 | next=NULL; | |
766 | } | |
767 | ||
768 | ||
769 | AndConstraint::AndConstraint(const AndConstraint& other) { | |
770 | this->op = other.op; | |
771 | this->opNum=other.opNum; | |
772 | this->rangeLow=other.rangeLow; | |
773 | this->rangeHigh=other.rangeHigh; | |
774 | this->integerOnly=other.integerOnly; | |
775 | this->notIn=other.notIn; | |
776 | if (other.next==NULL) { | |
777 | this->next=NULL; | |
778 | } | |
779 | else { | |
780 | this->next = new AndConstraint(*other.next); | |
781 | } | |
782 | } | |
783 | ||
784 | AndConstraint::~AndConstraint() { | |
785 | if (next!=NULL) { | |
786 | delete next; | |
787 | } | |
788 | } | |
789 | ||
790 | ||
791 | UBool | |
792 | AndConstraint::isFulfilled(double number) { | |
793 | UBool result=TRUE; | |
794 | double value=number; | |
4388f060 A |
795 | |
796 | // arrrrrrgh | |
797 | if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) { | |
798 | return notIn; | |
799 | } | |
800 | ||
46f4442e A |
801 | if ( op == MOD ) { |
802 | value = (int32_t)value % opNum; | |
803 | } | |
804 | if ( rangeHigh == -1 ) { | |
805 | if ( rangeLow == -1 ) { | |
806 | result = TRUE; // empty rule | |
807 | } | |
808 | else { | |
809 | if ( value == rangeLow ) { | |
810 | result = TRUE; | |
811 | } | |
812 | else { | |
813 | result = FALSE; | |
814 | } | |
815 | } | |
816 | } | |
817 | else { | |
818 | if ((rangeLow <= value) && (value <= rangeHigh)) { | |
819 | if (integerOnly) { | |
820 | if ( value != (int32_t)value) { | |
821 | result = FALSE; | |
822 | } | |
823 | else { | |
824 | result = TRUE; | |
825 | } | |
826 | } | |
827 | else { | |
828 | result = TRUE; | |
829 | } | |
830 | } | |
831 | else { | |
832 | result = FALSE; | |
833 | } | |
834 | } | |
835 | if (notIn) { | |
836 | return !result; | |
837 | } | |
838 | else { | |
839 | return result; | |
840 | } | |
841 | } | |
842 | ||
4388f060 A |
843 | UBool |
844 | AndConstraint::isLimited() { | |
845 | return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD; | |
846 | } | |
847 | ||
46f4442e A |
848 | int32_t |
849 | AndConstraint::updateRepeatLimit(int32_t maxLimit) { | |
4388f060 | 850 | |
46f4442e A |
851 | if ( op == MOD ) { |
852 | return uprv_max(opNum, maxLimit); | |
853 | } | |
854 | else { | |
855 | if ( rangeHigh == -1 ) { | |
46f4442e A |
856 | return uprv_max(rangeLow, maxLimit); |
857 | } | |
858 | else{ | |
859 | return uprv_max(rangeHigh, maxLimit); | |
860 | } | |
861 | } | |
862 | } | |
863 | ||
864 | ||
865 | AndConstraint* | |
866 | AndConstraint::add() | |
867 | { | |
868 | this->next = new AndConstraint(); | |
869 | return this->next; | |
870 | } | |
871 | ||
872 | OrConstraint::OrConstraint() { | |
873 | childNode=NULL; | |
874 | next=NULL; | |
875 | } | |
876 | ||
877 | OrConstraint::OrConstraint(const OrConstraint& other) { | |
878 | if ( other.childNode == NULL ) { | |
879 | this->childNode = NULL; | |
880 | } | |
881 | else { | |
882 | this->childNode = new AndConstraint(*(other.childNode)); | |
883 | } | |
884 | if (other.next == NULL ) { | |
885 | this->next = NULL; | |
886 | } | |
887 | else { | |
888 | this->next = new OrConstraint(*(other.next)); | |
889 | } | |
890 | } | |
891 | ||
892 | OrConstraint::~OrConstraint() { | |
893 | if (childNode!=NULL) { | |
894 | delete childNode; | |
895 | } | |
896 | if (next!=NULL) { | |
897 | delete next; | |
898 | } | |
899 | } | |
900 | ||
901 | AndConstraint* | |
902 | OrConstraint::add() | |
903 | { | |
904 | OrConstraint *curOrConstraint=this; | |
905 | { | |
906 | while (curOrConstraint->next!=NULL) { | |
907 | curOrConstraint = curOrConstraint->next; | |
908 | } | |
909 | curOrConstraint->next = NULL; | |
910 | curOrConstraint->childNode = new AndConstraint(); | |
911 | } | |
912 | return curOrConstraint->childNode; | |
913 | } | |
914 | ||
915 | UBool | |
916 | OrConstraint::isFulfilled(double number) { | |
917 | OrConstraint* orRule=this; | |
918 | UBool result=FALSE; | |
4388f060 | 919 | |
46f4442e A |
920 | while (orRule!=NULL && !result) { |
921 | result=TRUE; | |
922 | AndConstraint* andRule = orRule->childNode; | |
923 | while (andRule!=NULL && result) { | |
924 | result = andRule->isFulfilled(number); | |
925 | andRule=andRule->next; | |
926 | } | |
927 | orRule = orRule->next; | |
928 | } | |
4388f060 | 929 | |
46f4442e A |
930 | return result; |
931 | } | |
932 | ||
4388f060 A |
933 | UBool |
934 | OrConstraint::isLimited() { | |
935 | for (OrConstraint *orc = this; orc != NULL; orc = orc->next) { | |
936 | UBool result = FALSE; | |
937 | for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) { | |
938 | if (andc->isLimited()) { | |
939 | result = TRUE; | |
940 | break; | |
941 | } | |
942 | } | |
943 | if (result == FALSE) { | |
944 | return FALSE; | |
945 | } | |
946 | } | |
947 | return TRUE; | |
948 | } | |
46f4442e A |
949 | |
950 | RuleChain::RuleChain() { | |
951 | ruleHeader=NULL; | |
952 | next = NULL; | |
953 | repeatLimit=0; | |
954 | } | |
955 | ||
956 | RuleChain::RuleChain(const RuleChain& other) { | |
957 | this->repeatLimit = other.repeatLimit; | |
958 | this->keyword=other.keyword; | |
959 | if (other.ruleHeader != NULL) { | |
960 | this->ruleHeader = new OrConstraint(*(other.ruleHeader)); | |
961 | } | |
962 | else { | |
963 | this->ruleHeader = NULL; | |
964 | } | |
965 | if (other.next != NULL ) { | |
966 | this->next = new RuleChain(*other.next); | |
967 | } | |
968 | else | |
969 | { | |
970 | this->next = NULL; | |
971 | } | |
972 | } | |
973 | ||
974 | RuleChain::~RuleChain() { | |
975 | if (next != NULL) { | |
976 | delete next; | |
977 | } | |
978 | if ( ruleHeader != NULL ) { | |
979 | delete ruleHeader; | |
980 | } | |
981 | } | |
982 | ||
983 | UnicodeString | |
984 | RuleChain::select(double number) const { | |
4388f060 | 985 | |
46f4442e A |
986 | if ( ruleHeader != NULL ) { |
987 | if (ruleHeader->isFulfilled(number)) { | |
988 | return keyword; | |
989 | } | |
990 | } | |
991 | if ( next != NULL ) { | |
992 | return next->select(number); | |
993 | } | |
994 | else { | |
4388f060 | 995 | return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
46f4442e A |
996 | } |
997 | ||
998 | } | |
999 | ||
1000 | void | |
1001 | RuleChain::dumpRules(UnicodeString& result) { | |
1002 | UChar digitString[16]; | |
4388f060 | 1003 | |
46f4442e A |
1004 | if ( ruleHeader != NULL ) { |
1005 | result += keyword; | |
1006 | OrConstraint* orRule=ruleHeader; | |
1007 | while ( orRule != NULL ) { | |
1008 | AndConstraint* andRule=orRule->childNode; | |
1009 | while ( andRule != NULL ) { | |
1010 | if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) { | |
1011 | result += UNICODE_STRING_SIMPLE(" n is "); | |
1012 | if (andRule->notIn) { | |
1013 | result += UNICODE_STRING_SIMPLE("not "); | |
1014 | } | |
1015 | uprv_itou(digitString,16, andRule->rangeLow,10,0); | |
1016 | result += UnicodeString(digitString); | |
1017 | } | |
1018 | else { | |
1019 | if (andRule->op==AndConstraint::MOD) { | |
1020 | result += UNICODE_STRING_SIMPLE(" n mod "); | |
1021 | uprv_itou(digitString,16, andRule->opNum,10,0); | |
1022 | result += UnicodeString(digitString); | |
1023 | } | |
1024 | else { | |
1025 | result += UNICODE_STRING_SIMPLE(" n "); | |
1026 | } | |
1027 | if (andRule->rangeHigh==-1) { | |
1028 | if (andRule->notIn) { | |
1029 | result += UNICODE_STRING_SIMPLE(" is not "); | |
1030 | uprv_itou(digitString,16, andRule->rangeLow,10,0); | |
1031 | result += UnicodeString(digitString); | |
1032 | } | |
1033 | else { | |
1034 | result += UNICODE_STRING_SIMPLE(" is "); | |
1035 | uprv_itou(digitString,16, andRule->rangeLow,10,0); | |
1036 | result += UnicodeString(digitString); | |
1037 | } | |
1038 | } | |
1039 | else { | |
1040 | if (andRule->notIn) { | |
1041 | if ( andRule->integerOnly ) { | |
1042 | result += UNICODE_STRING_SIMPLE(" not in "); | |
1043 | } | |
1044 | else { | |
1045 | result += UNICODE_STRING_SIMPLE(" not within "); | |
1046 | } | |
1047 | uprv_itou(digitString,16, andRule->rangeLow,10,0); | |
1048 | result += UnicodeString(digitString); | |
1049 | result += UNICODE_STRING_SIMPLE(" .. "); | |
1050 | uprv_itou(digitString,16, andRule->rangeHigh,10,0); | |
1051 | result += UnicodeString(digitString); | |
1052 | } | |
1053 | else { | |
1054 | if ( andRule->integerOnly ) { | |
1055 | result += UNICODE_STRING_SIMPLE(" in "); | |
1056 | } | |
1057 | else { | |
1058 | result += UNICODE_STRING_SIMPLE(" within "); | |
1059 | } | |
1060 | uprv_itou(digitString,16, andRule->rangeLow,10,0); | |
1061 | result += UnicodeString(digitString); | |
1062 | result += UNICODE_STRING_SIMPLE(" .. "); | |
1063 | uprv_itou(digitString,16, andRule->rangeHigh,10,0); | |
1064 | } | |
1065 | } | |
1066 | } | |
1067 | if ( (andRule=andRule->next) != NULL) { | |
4388f060 | 1068 | result.append(PK_AND, 3); |
46f4442e A |
1069 | } |
1070 | } | |
1071 | if ( (orRule = orRule->next) != NULL ) { | |
4388f060 | 1072 | result.append(PK_OR, 2); |
46f4442e A |
1073 | } |
1074 | } | |
1075 | } | |
1076 | if ( next != NULL ) { | |
1077 | next->dumpRules(result); | |
1078 | } | |
1079 | } | |
1080 | ||
1081 | int32_t | |
1082 | RuleChain::getRepeatLimit () { | |
1083 | return repeatLimit; | |
1084 | } | |
1085 | ||
1086 | void | |
1087 | RuleChain::setRepeatLimit () { | |
1088 | int32_t limit=0; | |
1089 | ||
1090 | if ( next != NULL ) { | |
1091 | next->setRepeatLimit(); | |
1092 | limit = next->repeatLimit; | |
1093 | } | |
1094 | ||
1095 | if ( ruleHeader != NULL ) { | |
1096 | OrConstraint* orRule=ruleHeader; | |
1097 | while ( orRule != NULL ) { | |
1098 | AndConstraint* andRule=orRule->childNode; | |
1099 | while ( andRule != NULL ) { | |
1100 | limit = andRule->updateRepeatLimit(limit); | |
1101 | andRule = andRule->next; | |
1102 | } | |
1103 | orRule = orRule->next; | |
1104 | } | |
1105 | } | |
1106 | repeatLimit = limit; | |
1107 | } | |
1108 | ||
1109 | UErrorCode | |
1110 | RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { | |
1111 | if ( arraySize < capacityOfKeywords-1 ) { | |
1112 | keywords[arraySize++]=keyword; | |
1113 | } | |
1114 | else { | |
1115 | return U_BUFFER_OVERFLOW_ERROR; | |
1116 | } | |
1117 | ||
1118 | if ( next != NULL ) { | |
1119 | return next->getKeywords(capacityOfKeywords, keywords, arraySize); | |
1120 | } | |
1121 | else { | |
1122 | return U_ZERO_ERROR; | |
1123 | } | |
1124 | } | |
1125 | ||
1126 | UBool | |
1127 | RuleChain::isKeyword(const UnicodeString& keywordParam) const { | |
1128 | if ( keyword == keywordParam ) { | |
1129 | return TRUE; | |
1130 | } | |
1131 | ||
1132 | if ( next != NULL ) { | |
1133 | return next->isKeyword(keywordParam); | |
1134 | } | |
1135 | else { | |
1136 | return FALSE; | |
1137 | } | |
1138 | } | |
1139 | ||
1140 | ||
1141 | RuleParser::RuleParser() { | |
46f4442e A |
1142 | } |
1143 | ||
1144 | RuleParser::~RuleParser() { | |
46f4442e A |
1145 | } |
1146 | ||
1147 | void | |
1148 | RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status) | |
1149 | { | |
1150 | if (U_FAILURE(status)) { | |
1151 | return; | |
1152 | } | |
1153 | switch(prevType) { | |
1154 | case none: | |
1155 | case tSemiColon: | |
1156 | if (curType!=tKeyword) { | |
1157 | status = U_UNEXPECTED_TOKEN; | |
1158 | } | |
1159 | break; | |
1160 | case tVariableN : | |
4388f060 | 1161 | if (curType != tIs && curType != tMod && curType != tIn && |
46f4442e A |
1162 | curType != tNot && curType != tWithin) { |
1163 | status = U_UNEXPECTED_TOKEN; | |
1164 | } | |
1165 | break; | |
1166 | case tZero: | |
1167 | case tOne: | |
1168 | case tTwo: | |
1169 | case tFew: | |
1170 | case tMany: | |
1171 | case tOther: | |
1172 | case tKeyword: | |
1173 | if (curType != tColon) { | |
1174 | status = U_UNEXPECTED_TOKEN; | |
1175 | } | |
1176 | break; | |
1177 | case tColon : | |
1178 | if (curType != tVariableN) { | |
1179 | status = U_UNEXPECTED_TOKEN; | |
1180 | } | |
1181 | break; | |
1182 | case tIs: | |
1183 | if ( curType != tNumber && curType != tNot) { | |
1184 | status = U_UNEXPECTED_TOKEN; | |
1185 | } | |
1186 | break; | |
1187 | case tNot: | |
1188 | if (curType != tNumber && curType != tIn && curType != tWithin) { | |
1189 | status = U_UNEXPECTED_TOKEN; | |
1190 | } | |
1191 | break; | |
1192 | case tMod: | |
1193 | case tDot: | |
1194 | case tIn: | |
1195 | case tWithin: | |
1196 | case tAnd: | |
1197 | case tOr: | |
1198 | if (curType != tNumber && curType != tVariableN) { | |
1199 | status = U_UNEXPECTED_TOKEN; | |
1200 | } | |
1201 | break; | |
1202 | case tNumber: | |
1203 | if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot && | |
1204 | curType != tIn && curType != tWithin && curType != tAnd && curType != tOr) | |
1205 | { | |
1206 | status = U_UNEXPECTED_TOKEN; | |
1207 | } | |
1208 | break; | |
1209 | default: | |
1210 | status = U_UNEXPECTED_TOKEN; | |
1211 | break; | |
1212 | } | |
1213 | } | |
1214 | ||
1215 | void | |
1216 | RuleParser::getNextToken(const UnicodeString& ruleData, | |
1217 | int32_t *ruleIndex, | |
1218 | UnicodeString& token, | |
1219 | tokenType& type, | |
1220 | UErrorCode &status) | |
1221 | { | |
1222 | int32_t curIndex= *ruleIndex; | |
1223 | UChar ch; | |
1224 | tokenType prevType=none; | |
1225 | ||
729e4ab9 A |
1226 | if (U_FAILURE(status)) { |
1227 | return; | |
1228 | } | |
46f4442e A |
1229 | while (curIndex<ruleData.length()) { |
1230 | ch = ruleData.charAt(curIndex); | |
1231 | if ( !inRange(ch, type) ) { | |
1232 | status = U_ILLEGAL_CHARACTER; | |
1233 | return; | |
1234 | } | |
1235 | switch (type) { | |
1236 | case tSpace: | |
1237 | if ( *ruleIndex != curIndex ) { // letter | |
1238 | token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); | |
1239 | *ruleIndex=curIndex; | |
1240 | type=prevType; | |
1241 | getKeyType(token, type, status); | |
1242 | return; | |
1243 | } | |
1244 | else { | |
1245 | *ruleIndex=*ruleIndex+1; | |
1246 | } | |
1247 | break; // consective space | |
1248 | case tColon: | |
1249 | case tSemiColon: | |
1250 | if ( *ruleIndex != curIndex ) { | |
1251 | token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); | |
1252 | *ruleIndex=curIndex; | |
1253 | type=prevType; | |
1254 | getKeyType(token, type, status); | |
1255 | return; | |
1256 | } | |
1257 | else { | |
1258 | *ruleIndex=curIndex+1; | |
1259 | return; | |
1260 | } | |
1261 | case tLetter: | |
1262 | if ((type==prevType)||(prevType==none)) { | |
1263 | prevType=type; | |
1264 | break; | |
1265 | } | |
1266 | break; | |
1267 | case tNumber: | |
1268 | if ((type==prevType)||(prevType==none)) { | |
1269 | prevType=type; | |
1270 | break; | |
1271 | } | |
1272 | else { | |
1273 | *ruleIndex=curIndex+1; | |
1274 | return; | |
1275 | } | |
1276 | case tDot: | |
1277 | if (prevType==none) { // first dot | |
1278 | prevType=type; | |
1279 | continue; | |
1280 | } | |
1281 | else { | |
1282 | if ( *ruleIndex != curIndex ) { | |
1283 | token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); | |
1284 | *ruleIndex=curIndex; // letter | |
1285 | type=prevType; | |
1286 | getKeyType(token, type, status); | |
1287 | return; | |
1288 | } | |
1289 | else { // two consective dots | |
1290 | *ruleIndex=curIndex+2; | |
1291 | return; | |
1292 | } | |
1293 | } | |
46f4442e A |
1294 | default: |
1295 | status = U_UNEXPECTED_TOKEN; | |
1296 | return; | |
1297 | } | |
1298 | curIndex++; | |
1299 | } | |
1300 | if ( curIndex>=ruleData.length() ) { | |
1301 | if ( (type == tLetter)||(type == tNumber) ) { | |
1302 | token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); | |
1303 | getKeyType(token, type, status); | |
729e4ab9 A |
1304 | if (U_FAILURE(status)) { |
1305 | return; | |
1306 | } | |
46f4442e A |
1307 | } |
1308 | *ruleIndex = ruleData.length(); | |
1309 | } | |
1310 | } | |
1311 | ||
1312 | UBool | |
1313 | RuleParser::inRange(UChar ch, tokenType& type) { | |
1314 | if ((ch>=CAP_A) && (ch<=CAP_Z)) { | |
1315 | // we assume all characters are in lower case already. | |
1316 | return FALSE; | |
1317 | } | |
1318 | if ((ch>=LOW_A) && (ch<=LOW_Z)) { | |
1319 | type = tLetter; | |
1320 | return TRUE; | |
1321 | } | |
1322 | if ((ch>=U_ZERO) && (ch<=U_NINE)) { | |
1323 | type = tNumber; | |
1324 | return TRUE; | |
1325 | } | |
1326 | switch (ch) { | |
1327 | case COLON: | |
1328 | type = tColon; | |
1329 | return TRUE; | |
1330 | case SPACE: | |
1331 | type = tSpace; | |
1332 | return TRUE; | |
1333 | case SEMI_COLON: | |
1334 | type = tSemiColon; | |
1335 | return TRUE; | |
1336 | case DOT: | |
1337 | type = tDot; | |
1338 | return TRUE; | |
1339 | default : | |
1340 | type = none; | |
1341 | return FALSE; | |
1342 | } | |
1343 | } | |
1344 | ||
1345 | ||
1346 | void | |
1347 | RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status) | |
1348 | { | |
729e4ab9 A |
1349 | if (U_FAILURE(status)) { |
1350 | return; | |
1351 | } | |
46f4442e A |
1352 | if ( keyType==tNumber) { |
1353 | } | |
4388f060 | 1354 | else if (0 == token.compare(PK_VAR_N, 1)) { |
46f4442e A |
1355 | keyType = tVariableN; |
1356 | } | |
4388f060 | 1357 | else if (0 == token.compare(PK_IS, 2)) { |
46f4442e A |
1358 | keyType = tIs; |
1359 | } | |
4388f060 | 1360 | else if (0 == token.compare(PK_AND, 3)) { |
46f4442e A |
1361 | keyType = tAnd; |
1362 | } | |
4388f060 | 1363 | else if (0 == token.compare(PK_IN, 2)) { |
46f4442e A |
1364 | keyType = tIn; |
1365 | } | |
4388f060 | 1366 | else if (0 == token.compare(PK_WITHIN, 6)) { |
46f4442e A |
1367 | keyType = tWithin; |
1368 | } | |
4388f060 | 1369 | else if (0 == token.compare(PK_NOT, 3)) { |
46f4442e A |
1370 | keyType = tNot; |
1371 | } | |
4388f060 | 1372 | else if (0 == token.compare(PK_MOD, 3)) { |
46f4442e A |
1373 | keyType = tMod; |
1374 | } | |
4388f060 | 1375 | else if (0 == token.compare(PK_OR, 2)) { |
46f4442e A |
1376 | keyType = tOr; |
1377 | } | |
1378 | else if ( isValidKeyword(token) ) { | |
1379 | keyType = tKeyword; | |
1380 | } | |
1381 | else { | |
1382 | status = U_UNEXPECTED_TOKEN; | |
1383 | } | |
1384 | } | |
1385 | ||
1386 | UBool | |
1387 | RuleParser::isValidKeyword(const UnicodeString& token) { | |
4388f060 | 1388 | return PatternProps::isIdentifier(token.getBuffer(), token.length()); |
46f4442e A |
1389 | } |
1390 | ||
4388f060 A |
1391 | PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) |
1392 | : pos(0), fKeywordNames(status) { | |
729e4ab9 A |
1393 | if (U_FAILURE(status)) { |
1394 | return; | |
1395 | } | |
4388f060 A |
1396 | fKeywordNames.setDeleter(uprv_deleteUObject); |
1397 | UBool addKeywordOther=TRUE; | |
1398 | RuleChain *node=header; | |
46f4442e A |
1399 | while(node!=NULL) { |
1400 | fKeywordNames.addElement(new UnicodeString(node->keyword), status); | |
729e4ab9 A |
1401 | if (U_FAILURE(status)) { |
1402 | return; | |
1403 | } | |
4388f060 A |
1404 | if (0 == node->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
1405 | addKeywordOther= FALSE; | |
46f4442e A |
1406 | } |
1407 | node=node->next; | |
1408 | } | |
4388f060 | 1409 | |
46f4442e A |
1410 | if (addKeywordOther) { |
1411 | fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); | |
1412 | } | |
1413 | } | |
1414 | ||
1415 | const UnicodeString* | |
1416 | PluralKeywordEnumeration::snext(UErrorCode& status) { | |
1417 | if (U_SUCCESS(status) && pos < fKeywordNames.size()) { | |
1418 | return (const UnicodeString*)fKeywordNames.elementAt(pos++); | |
1419 | } | |
1420 | return NULL; | |
1421 | } | |
1422 | ||
1423 | void | |
1424 | PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { | |
1425 | pos=0; | |
1426 | } | |
1427 | ||
1428 | int32_t | |
1429 | PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { | |
1430 | return fKeywordNames.size(); | |
1431 | } | |
1432 | ||
1433 | PluralKeywordEnumeration::~PluralKeywordEnumeration() { | |
46f4442e A |
1434 | } |
1435 | ||
1436 | U_NAMESPACE_END | |
1437 | ||
1438 | ||
1439 | #endif /* #if !UCONFIG_NO_FORMATTING */ | |
1440 | ||
1441 | //eof |