]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2007-2014, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * | |
7 | * File plurrule.cpp | |
8 | */ | |
9 | ||
10 | #include <math.h> | |
11 | #include <stdio.h> | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | #include "unicode/localpointer.h" | |
15 | #include "unicode/plurrule.h" | |
16 | #include "unicode/upluralrules.h" | |
17 | #include "unicode/ures.h" | |
18 | #include "charstr.h" | |
19 | #include "cmemory.h" | |
20 | #include "cstring.h" | |
21 | #include "digitlst.h" | |
22 | #include "hash.h" | |
23 | #include "locutil.h" | |
24 | #include "mutex.h" | |
25 | #include "patternprops.h" | |
26 | #include "plurrule_impl.h" | |
27 | #include "putilimp.h" | |
28 | #include "ucln_in.h" | |
29 | #include "ustrfmt.h" | |
30 | #include "uassert.h" | |
31 | #include "uvectr32.h" | |
32 | #include "sharedpluralrules.h" | |
33 | #include "lrucache.h" | |
34 | ||
35 | #if !UCONFIG_NO_FORMATTING | |
36 | ||
37 | static icu::LRUCache *gPluralRulesCache = NULL; | |
38 | static UMutex gPluralRulesCacheMutex = U_MUTEX_INITIALIZER; | |
39 | static icu::UInitOnce gPluralRulesCacheInitOnce = U_INITONCE_INITIALIZER; | |
40 | ||
41 | U_CDECL_BEGIN | |
42 | static UBool U_CALLCONV plurrules_cleanup(void) { | |
43 | gPluralRulesCacheInitOnce.reset(); | |
44 | if (gPluralRulesCache) { | |
45 | delete gPluralRulesCache; | |
46 | gPluralRulesCache = NULL; | |
47 | } | |
48 | return TRUE; | |
49 | } | |
50 | U_CDECL_END | |
51 | ||
52 | U_NAMESPACE_BEGIN | |
53 | ||
54 | #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) | |
55 | ||
56 | static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; | |
57 | static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; | |
58 | static const UChar PK_IN[]={LOW_I,LOW_N,0}; | |
59 | static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; | |
60 | static const UChar PK_IS[]={LOW_I,LOW_S,0}; | |
61 | static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; | |
62 | static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; | |
63 | static const UChar PK_OR[]={LOW_O,LOW_R,0}; | |
64 | static const UChar PK_VAR_N[]={LOW_N,0}; | |
65 | static const UChar PK_VAR_I[]={LOW_I,0}; | |
66 | static const UChar PK_VAR_F[]={LOW_F,0}; | |
67 | static const UChar PK_VAR_T[]={LOW_T,0}; | |
68 | static const UChar PK_VAR_V[]={LOW_V,0}; | |
69 | static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; | |
70 | static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0}; | |
71 | static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0}; | |
72 | ||
73 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) | |
74 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) | |
75 | ||
76 | PluralRules::PluralRules(UErrorCode& /*status*/) | |
77 | : UObject(), | |
78 | mRules(NULL) | |
79 | { | |
80 | } | |
81 | ||
82 | PluralRules::PluralRules(const PluralRules& other) | |
83 | : UObject(other), | |
84 | mRules(NULL) | |
85 | { | |
86 | *this=other; | |
87 | } | |
88 | ||
89 | PluralRules::~PluralRules() { | |
90 | delete mRules; | |
91 | } | |
92 | ||
93 | SharedPluralRules::~SharedPluralRules() { | |
94 | delete ptr; | |
95 | } | |
96 | ||
97 | PluralRules* | |
98 | PluralRules::clone() const { | |
99 | return new PluralRules(*this); | |
100 | } | |
101 | ||
102 | PluralRules& | |
103 | PluralRules::operator=(const PluralRules& other) { | |
104 | if (this != &other) { | |
105 | delete mRules; | |
106 | if (other.mRules==NULL) { | |
107 | mRules = NULL; | |
108 | } | |
109 | else { | |
110 | mRules = new RuleChain(*other.mRules); | |
111 | } | |
112 | } | |
113 | ||
114 | return *this; | |
115 | } | |
116 | ||
117 | StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) { | |
118 | StringEnumeration *result = new PluralAvailableLocalesEnumeration(status); | |
119 | if (result == NULL && U_SUCCESS(status)) { | |
120 | status = U_MEMORY_ALLOCATION_ERROR; | |
121 | } | |
122 | if (U_FAILURE(status)) { | |
123 | delete result; | |
124 | result = NULL; | |
125 | } | |
126 | return result; | |
127 | } | |
128 | ||
129 | ||
130 | PluralRules* U_EXPORT2 | |
131 | PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { | |
132 | if (U_FAILURE(status)) { | |
133 | return NULL; | |
134 | } | |
135 | ||
136 | PluralRuleParser parser; | |
137 | PluralRules *newRules = new PluralRules(status); | |
138 | if (U_SUCCESS(status) && newRules == NULL) { | |
139 | status = U_MEMORY_ALLOCATION_ERROR; | |
140 | } | |
141 | parser.parse(description, newRules, status); | |
142 | if (U_FAILURE(status)) { | |
143 | delete newRules; | |
144 | newRules = NULL; | |
145 | } | |
146 | return newRules; | |
147 | } | |
148 | ||
149 | ||
150 | PluralRules* U_EXPORT2 | |
151 | PluralRules::createDefaultRules(UErrorCode& status) { | |
152 | return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status); | |
153 | } | |
154 | ||
155 | /******************************************************************************/ | |
156 | /* Create PluralRules cache */ | |
157 | ||
158 | static SharedObject *U_CALLCONV createSharedPluralRules( | |
159 | const char *localeId, UErrorCode &status) { | |
160 | if (U_FAILURE(status)) { | |
161 | return NULL; | |
162 | } | |
163 | PluralRules *pr = PluralRules::internalForLocale( | |
164 | localeId, UPLURAL_TYPE_CARDINAL, status); | |
165 | if (U_FAILURE(status)) { | |
166 | return NULL; | |
167 | } | |
168 | SharedObject *result = new SharedPluralRules(pr); | |
169 | if (result == NULL) { | |
170 | status = U_MEMORY_ALLOCATION_ERROR; | |
171 | delete pr; | |
172 | return NULL; | |
173 | } | |
174 | return result; | |
175 | } | |
176 | ||
177 | static void U_CALLCONV pluralRulesCacheInit(UErrorCode &status) { | |
178 | U_ASSERT(gPluralRulesCache == NULL); | |
179 | ucln_i18n_registerCleanup(UCLN_I18N_PLURAL_RULE, plurrules_cleanup); | |
180 | gPluralRulesCache = new SimpleLRUCache(100, &createSharedPluralRules, status); | |
181 | if (U_FAILURE(status)) { | |
182 | delete gPluralRulesCache; | |
183 | gPluralRulesCache = NULL; | |
184 | } | |
185 | } | |
186 | ||
187 | static void getSharedPluralRulesFromCache( | |
188 | const char *locale, | |
189 | const SharedPluralRules *&ptr, | |
190 | UErrorCode &status) { | |
191 | umtx_initOnce(gPluralRulesCacheInitOnce, &pluralRulesCacheInit, status); | |
192 | if (U_FAILURE(status)) { | |
193 | return; | |
194 | } | |
195 | Mutex lock(&gPluralRulesCacheMutex); | |
196 | gPluralRulesCache->get(locale, ptr, status); | |
197 | } | |
198 | ||
199 | ||
200 | ||
201 | ||
202 | /* end plural rules cache */ | |
203 | /******************************************************************************/ | |
204 | ||
205 | const SharedPluralRules* U_EXPORT2 | |
206 | PluralRules::createSharedInstance( | |
207 | const Locale& locale, UPluralType type, UErrorCode& status) { | |
208 | if (U_FAILURE(status)) { | |
209 | return NULL; | |
210 | } | |
211 | if (type != UPLURAL_TYPE_CARDINAL) { | |
212 | status = U_UNSUPPORTED_ERROR; | |
213 | return NULL; | |
214 | } | |
215 | const SharedPluralRules *result = NULL; | |
216 | getSharedPluralRulesFromCache(locale.getName(), result, status); | |
217 | return result; | |
218 | } | |
219 | ||
220 | PluralRules* U_EXPORT2 | |
221 | PluralRules::forLocale(const Locale& locale, UErrorCode& status) { | |
222 | return forLocale(locale, UPLURAL_TYPE_CARDINAL, status); | |
223 | } | |
224 | ||
225 | PluralRules* U_EXPORT2 | |
226 | PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) { | |
227 | if (type != UPLURAL_TYPE_CARDINAL) { | |
228 | return internalForLocale(locale, type, status); | |
229 | } | |
230 | const SharedPluralRules *shared = createSharedInstance( | |
231 | locale, type, status); | |
232 | if (U_FAILURE(status)) { | |
233 | return NULL; | |
234 | } | |
235 | PluralRules *result = (*shared)->clone(); | |
236 | shared->removeRef(); | |
237 | if (result == NULL) { | |
238 | status = U_MEMORY_ALLOCATION_ERROR; | |
239 | } | |
240 | return result; | |
241 | } | |
242 | ||
243 | PluralRules* U_EXPORT2 | |
244 | PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) { | |
245 | if (U_FAILURE(status)) { | |
246 | return NULL; | |
247 | } | |
248 | if (type >= UPLURAL_TYPE_COUNT) { | |
249 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
250 | return NULL; | |
251 | } | |
252 | PluralRules *newObj = new PluralRules(status); | |
253 | if (newObj==NULL || U_FAILURE(status)) { | |
254 | delete newObj; | |
255 | return NULL; | |
256 | } | |
257 | UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); | |
258 | // TODO: which errors, if any, should be returned? | |
259 | if (locRule.length() == 0) { | |
260 | // Locales with no specific rules (all numbers have the "other" category | |
261 | // will return a U_MISSING_RESOURCE_ERROR at this point. This is not | |
262 | // an error. | |
263 | locRule = UnicodeString(PLURAL_DEFAULT_RULE); | |
264 | status = U_ZERO_ERROR; | |
265 | } | |
266 | PluralRuleParser parser; | |
267 | parser.parse(locRule, newObj, status); | |
268 | // TODO: should rule parse errors be returned, or | |
269 | // should we silently use default rules? | |
270 | // Original impl used default rules. | |
271 | // Ask the question to ICU Core. | |
272 | ||
273 | return newObj; | |
274 | } | |
275 | ||
276 | UnicodeString | |
277 | PluralRules::select(int32_t number) const { | |
278 | return select(FixedDecimal(number)); | |
279 | } | |
280 | ||
281 | UnicodeString | |
282 | PluralRules::select(double number) const { | |
283 | return select(FixedDecimal(number)); | |
284 | } | |
285 | ||
286 | UnicodeString | |
287 | PluralRules::select(const FixedDecimal &number) const { | |
288 | if (mRules == NULL) { | |
289 | return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); | |
290 | } | |
291 | else { | |
292 | return mRules->select(number); | |
293 | } | |
294 | } | |
295 | ||
296 | StringEnumeration* | |
297 | PluralRules::getKeywords(UErrorCode& status) const { | |
298 | if (U_FAILURE(status)) return NULL; | |
299 | StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); | |
300 | if (U_FAILURE(status)) { | |
301 | delete nameEnumerator; | |
302 | return NULL; | |
303 | } | |
304 | ||
305 | return nameEnumerator; | |
306 | } | |
307 | ||
308 | double | |
309 | PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) { | |
310 | // Not Implemented. | |
311 | return UPLRULES_NO_UNIQUE_VALUE; | |
312 | } | |
313 | ||
314 | int32_t | |
315 | PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */, | |
316 | int32_t /* destCapacity */, UErrorCode& error) { | |
317 | error = U_UNSUPPORTED_ERROR; | |
318 | return 0; | |
319 | } | |
320 | ||
321 | ||
322 | static double scaleForInt(double d) { | |
323 | double scale = 1.0; | |
324 | while (d != floor(d)) { | |
325 | d = d * 10.0; | |
326 | scale = scale * 10.0; | |
327 | } | |
328 | return scale; | |
329 | } | |
330 | ||
331 | static int32_t | |
332 | getSamplesFromString(const UnicodeString &samples, double *dest, | |
333 | int32_t destCapacity, UErrorCode& status) { | |
334 | int32_t sampleCount = 0; | |
335 | int32_t sampleStartIdx = 0; | |
336 | int32_t sampleEndIdx = 0; | |
337 | ||
338 | //std::string ss; // TODO: debugging. | |
339 | // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n"; | |
340 | for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) { | |
341 | sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx); | |
342 | if (sampleEndIdx == -1) { | |
343 | sampleEndIdx = samples.length(); | |
344 | } | |
345 | const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx); | |
346 | // ss.erase(); | |
347 | // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n"; | |
348 | int32_t tildeIndex = sampleRange.indexOf(TILDE); | |
349 | if (tildeIndex < 0) { | |
350 | FixedDecimal fixed(sampleRange, status); | |
351 | double sampleValue = fixed.source; | |
352 | if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) { | |
353 | dest[sampleCount++] = sampleValue; | |
354 | } | |
355 | } else { | |
356 | ||
357 | FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status); | |
358 | FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status); | |
359 | double rangeLo = fixedLo.source; | |
360 | double rangeHi = fixedHi.source; | |
361 | if (U_FAILURE(status)) { | |
362 | break; | |
363 | } | |
364 | if (rangeHi < rangeLo) { | |
365 | status = U_INVALID_FORMAT_ERROR; | |
366 | break; | |
367 | } | |
368 | ||
369 | // For ranges of samples with fraction decimal digits, scale the number up so that we | |
370 | // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths. | |
371 | ||
372 | double scale = scaleForInt(rangeLo); | |
373 | double t = scaleForInt(rangeHi); | |
374 | if (t > scale) { | |
375 | scale = t; | |
376 | } | |
377 | rangeLo *= scale; | |
378 | rangeHi *= scale; | |
379 | for (double n=rangeLo; n<=rangeHi; n+=1) { | |
380 | // Hack Alert: don't return any decimal samples with integer values that | |
381 | // originated from a format with trailing decimals. | |
382 | // This API is returning doubles, which can't distinguish having displayed | |
383 | // zeros to the right of the decimal. | |
384 | // This results in test failures with values mapping back to a different keyword. | |
385 | double sampleValue = n/scale; | |
386 | if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) { | |
387 | dest[sampleCount++] = sampleValue; | |
388 | } | |
389 | if (sampleCount >= destCapacity) { | |
390 | break; | |
391 | } | |
392 | } | |
393 | } | |
394 | sampleStartIdx = sampleEndIdx + 1; | |
395 | } | |
396 | return sampleCount; | |
397 | } | |
398 | ||
399 | ||
400 | int32_t | |
401 | PluralRules::getSamples(const UnicodeString &keyword, double *dest, | |
402 | int32_t destCapacity, UErrorCode& status) { | |
403 | RuleChain *rc = rulesForKeyword(keyword); | |
404 | if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) { | |
405 | return 0; | |
406 | } | |
407 | int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status); | |
408 | if (numSamples == 0) { | |
409 | numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status); | |
410 | } | |
411 | return numSamples; | |
412 | } | |
413 | ||
414 | ||
415 | RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const { | |
416 | RuleChain *rc; | |
417 | for (rc = mRules; rc != NULL; rc = rc->fNext) { | |
418 | if (rc->fKeyword == keyword) { | |
419 | break; | |
420 | } | |
421 | } | |
422 | return rc; | |
423 | } | |
424 | ||
425 | ||
426 | UBool | |
427 | PluralRules::isKeyword(const UnicodeString& keyword) const { | |
428 | if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { | |
429 | return true; | |
430 | } | |
431 | return rulesForKeyword(keyword) != NULL; | |
432 | } | |
433 | ||
434 | UnicodeString | |
435 | PluralRules::getKeywordOther() const { | |
436 | return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); | |
437 | } | |
438 | ||
439 | UBool | |
440 | PluralRules::operator==(const PluralRules& other) const { | |
441 | const UnicodeString *ptrKeyword; | |
442 | UErrorCode status= U_ZERO_ERROR; | |
443 | ||
444 | if ( this == &other ) { | |
445 | return TRUE; | |
446 | } | |
447 | LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); | |
448 | LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); | |
449 | if (U_FAILURE(status)) { | |
450 | return FALSE; | |
451 | } | |
452 | ||
453 | if (myKeywordList->count(status)!=otherKeywordList->count(status)) { | |
454 | return FALSE; | |
455 | } | |
456 | myKeywordList->reset(status); | |
457 | while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { | |
458 | if (!other.isKeyword(*ptrKeyword)) { | |
459 | return FALSE; | |
460 | } | |
461 | } | |
462 | otherKeywordList->reset(status); | |
463 | while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { | |
464 | if (!this->isKeyword(*ptrKeyword)) { | |
465 | return FALSE; | |
466 | } | |
467 | } | |
468 | if (U_FAILURE(status)) { | |
469 | return FALSE; | |
470 | } | |
471 | ||
472 | return TRUE; | |
473 | } | |
474 | ||
475 | ||
476 | void | |
477 | PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status) | |
478 | { | |
479 | if (U_FAILURE(status)) { | |
480 | return; | |
481 | } | |
482 | U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only! | |
483 | ruleSrc = &ruleData; | |
484 | ||
485 | while (ruleIndex< ruleSrc->length()) { | |
486 | getNextToken(status); | |
487 | if (U_FAILURE(status)) { | |
488 | return; | |
489 | } | |
490 | checkSyntax(status); | |
491 | if (U_FAILURE(status)) { | |
492 | return; | |
493 | } | |
494 | switch (type) { | |
495 | case tAnd: | |
496 | U_ASSERT(curAndConstraint != NULL); | |
497 | curAndConstraint = curAndConstraint->add(); | |
498 | break; | |
499 | case tOr: | |
500 | { | |
501 | U_ASSERT(currentChain != NULL); | |
502 | OrConstraint *orNode=currentChain->ruleHeader; | |
503 | while (orNode->next != NULL) { | |
504 | orNode = orNode->next; | |
505 | } | |
506 | orNode->next= new OrConstraint(); | |
507 | orNode=orNode->next; | |
508 | orNode->next=NULL; | |
509 | curAndConstraint = orNode->add(); | |
510 | } | |
511 | break; | |
512 | case tIs: | |
513 | U_ASSERT(curAndConstraint != NULL); | |
514 | U_ASSERT(curAndConstraint->value == -1); | |
515 | U_ASSERT(curAndConstraint->rangeList == NULL); | |
516 | break; | |
517 | case tNot: | |
518 | U_ASSERT(curAndConstraint != NULL); | |
519 | curAndConstraint->negated=TRUE; | |
520 | break; | |
521 | ||
522 | case tNotEqual: | |
523 | curAndConstraint->negated=TRUE; | |
524 | case tIn: | |
525 | case tWithin: | |
526 | case tEqual: | |
527 | U_ASSERT(curAndConstraint != NULL); | |
528 | curAndConstraint->rangeList = new UVector32(status); | |
529 | curAndConstraint->rangeList->addElement(-1, status); // range Low | |
530 | curAndConstraint->rangeList->addElement(-1, status); // range Hi | |
531 | rangeLowIdx = 0; | |
532 | rangeHiIdx = 1; | |
533 | curAndConstraint->value=PLURAL_RANGE_HIGH; | |
534 | curAndConstraint->integerOnly = (type != tWithin); | |
535 | break; | |
536 | case tNumber: | |
537 | U_ASSERT(curAndConstraint != NULL); | |
538 | if ( (curAndConstraint->op==AndConstraint::MOD)&& | |
539 | (curAndConstraint->opNum == -1 ) ) { | |
540 | curAndConstraint->opNum=getNumberValue(token); | |
541 | } | |
542 | else { | |
543 | if (curAndConstraint->rangeList == NULL) { | |
544 | // this is for an 'is' rule | |
545 | curAndConstraint->value = getNumberValue(token); | |
546 | } else { | |
547 | // this is for an 'in' or 'within' rule | |
548 | if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) { | |
549 | curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx); | |
550 | curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); | |
551 | } | |
552 | else { | |
553 | curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); | |
554 | if (curAndConstraint->rangeList->elementAti(rangeLowIdx) > | |
555 | curAndConstraint->rangeList->elementAti(rangeHiIdx)) { | |
556 | // Range Lower bound > Range Upper bound. | |
557 | // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently | |
558 | // used for all plural rule parse errors. | |
559 | status = U_UNEXPECTED_TOKEN; | |
560 | break; | |
561 | } | |
562 | } | |
563 | } | |
564 | } | |
565 | break; | |
566 | case tComma: | |
567 | // TODO: rule syntax checking is inadequate, can happen with badly formed rules. | |
568 | // Catch cases like "n mod 10, is 1" here instead. | |
569 | if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) { | |
570 | status = U_UNEXPECTED_TOKEN; | |
571 | break; | |
572 | } | |
573 | U_ASSERT(curAndConstraint->rangeList->size() >= 2); | |
574 | rangeLowIdx = curAndConstraint->rangeList->size(); | |
575 | curAndConstraint->rangeList->addElement(-1, status); // range Low | |
576 | rangeHiIdx = curAndConstraint->rangeList->size(); | |
577 | curAndConstraint->rangeList->addElement(-1, status); // range Hi | |
578 | break; | |
579 | case tMod: | |
580 | U_ASSERT(curAndConstraint != NULL); | |
581 | curAndConstraint->op=AndConstraint::MOD; | |
582 | break; | |
583 | case tVariableN: | |
584 | case tVariableI: | |
585 | case tVariableF: | |
586 | case tVariableT: | |
587 | case tVariableV: | |
588 | U_ASSERT(curAndConstraint != NULL); | |
589 | curAndConstraint->digitsType = type; | |
590 | break; | |
591 | case tKeyword: | |
592 | { | |
593 | RuleChain *newChain = new RuleChain; | |
594 | if (newChain == NULL) { | |
595 | status = U_MEMORY_ALLOCATION_ERROR; | |
596 | break; | |
597 | } | |
598 | newChain->fKeyword = token; | |
599 | if (prules->mRules == NULL) { | |
600 | prules->mRules = newChain; | |
601 | } else { | |
602 | // The new rule chain goes at the end of the linked list of rule chains, | |
603 | // unless there is an "other" keyword & chain. "other" must remain last. | |
604 | RuleChain *insertAfter = prules->mRules; | |
605 | while (insertAfter->fNext!=NULL && | |
606 | insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){ | |
607 | insertAfter=insertAfter->fNext; | |
608 | } | |
609 | newChain->fNext = insertAfter->fNext; | |
610 | insertAfter->fNext = newChain; | |
611 | } | |
612 | OrConstraint *orNode = new OrConstraint(); | |
613 | newChain->ruleHeader = orNode; | |
614 | curAndConstraint = orNode->add(); | |
615 | currentChain = newChain; | |
616 | } | |
617 | break; | |
618 | ||
619 | case tInteger: | |
620 | for (;;) { | |
621 | getNextToken(status); | |
622 | if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { | |
623 | break; | |
624 | } | |
625 | if (type == tEllipsis) { | |
626 | currentChain->fIntegerSamplesUnbounded = TRUE; | |
627 | continue; | |
628 | } | |
629 | currentChain->fIntegerSamples.append(token); | |
630 | } | |
631 | break; | |
632 | ||
633 | case tDecimal: | |
634 | for (;;) { | |
635 | getNextToken(status); | |
636 | if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { | |
637 | break; | |
638 | } | |
639 | if (type == tEllipsis) { | |
640 | currentChain->fDecimalSamplesUnbounded = TRUE; | |
641 | continue; | |
642 | } | |
643 | currentChain->fDecimalSamples.append(token); | |
644 | } | |
645 | break; | |
646 | ||
647 | default: | |
648 | break; | |
649 | } | |
650 | prevType=type; | |
651 | if (U_FAILURE(status)) { | |
652 | break; | |
653 | } | |
654 | } | |
655 | } | |
656 | ||
657 | UnicodeString | |
658 | PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) { | |
659 | UnicodeString emptyStr; | |
660 | ||
661 | if (U_FAILURE(errCode)) { | |
662 | return emptyStr; | |
663 | } | |
664 | LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode)); | |
665 | if(U_FAILURE(errCode)) { | |
666 | return emptyStr; | |
667 | } | |
668 | const char *typeKey; | |
669 | switch (type) { | |
670 | case UPLURAL_TYPE_CARDINAL: | |
671 | typeKey = "locales"; | |
672 | break; | |
673 | case UPLURAL_TYPE_ORDINAL: | |
674 | typeKey = "locales_ordinals"; | |
675 | break; | |
676 | default: | |
677 | // Must not occur: The caller should have checked for valid types. | |
678 | errCode = U_ILLEGAL_ARGUMENT_ERROR; | |
679 | return emptyStr; | |
680 | } | |
681 | LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode)); | |
682 | if(U_FAILURE(errCode)) { | |
683 | return emptyStr; | |
684 | } | |
685 | int32_t resLen=0; | |
686 | const char *curLocaleName=locale.getName(); | |
687 | const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); | |
688 | ||
689 | if (s == NULL) { | |
690 | // Check parent locales. | |
691 | UErrorCode status = U_ZERO_ERROR; | |
692 | char parentLocaleName[ULOC_FULLNAME_CAPACITY]; | |
693 | const char *curLocaleName=locale.getName(); | |
694 | uprv_strcpy(parentLocaleName, curLocaleName); | |
695 | ||
696 | while (uloc_getParent(parentLocaleName, parentLocaleName, | |
697 | ULOC_FULLNAME_CAPACITY, &status) > 0) { | |
698 | resLen=0; | |
699 | s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); | |
700 | if (s != NULL) { | |
701 | errCode = U_ZERO_ERROR; | |
702 | break; | |
703 | } | |
704 | status = U_ZERO_ERROR; | |
705 | } | |
706 | } | |
707 | if (s==NULL) { | |
708 | return emptyStr; | |
709 | } | |
710 | ||
711 | char setKey[256]; | |
712 | u_UCharsToChars(s, setKey, resLen + 1); | |
713 | // printf("\n PluralRule: %s\n", setKey); | |
714 | ||
715 | LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode)); | |
716 | if(U_FAILURE(errCode)) { | |
717 | return emptyStr; | |
718 | } | |
719 | LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode)); | |
720 | if (U_FAILURE(errCode)) { | |
721 | return emptyStr; | |
722 | } | |
723 | ||
724 | int32_t numberKeys = ures_getSize(setRes.getAlias()); | |
725 | UnicodeString result; | |
726 | const char *key=NULL; | |
727 | for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ... | |
728 | UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode); | |
729 | UnicodeString uKey(key, -1, US_INV); | |
730 | result.append(uKey); | |
731 | result.append(COLON); | |
732 | result.append(rules); | |
733 | result.append(SEMI_COLON); | |
734 | } | |
735 | return result; | |
736 | } | |
737 | ||
738 | ||
739 | UnicodeString | |
740 | PluralRules::getRules() const { | |
741 | UnicodeString rules; | |
742 | if (mRules != NULL) { | |
743 | mRules->dumpRules(rules); | |
744 | } | |
745 | return rules; | |
746 | } | |
747 | ||
748 | ||
749 | AndConstraint::AndConstraint() { | |
750 | op = AndConstraint::NONE; | |
751 | opNum=-1; | |
752 | value = -1; | |
753 | rangeList = NULL; | |
754 | negated = FALSE; | |
755 | integerOnly = FALSE; | |
756 | digitsType = none; | |
757 | next=NULL; | |
758 | } | |
759 | ||
760 | ||
761 | AndConstraint::AndConstraint(const AndConstraint& other) { | |
762 | this->op = other.op; | |
763 | this->opNum=other.opNum; | |
764 | this->value=other.value; | |
765 | this->rangeList=NULL; | |
766 | if (other.rangeList != NULL) { | |
767 | UErrorCode status = U_ZERO_ERROR; | |
768 | this->rangeList = new UVector32(status); | |
769 | this->rangeList->assign(*other.rangeList, status); | |
770 | } | |
771 | this->integerOnly=other.integerOnly; | |
772 | this->negated=other.negated; | |
773 | this->digitsType = other.digitsType; | |
774 | if (other.next==NULL) { | |
775 | this->next=NULL; | |
776 | } | |
777 | else { | |
778 | this->next = new AndConstraint(*other.next); | |
779 | } | |
780 | } | |
781 | ||
782 | AndConstraint::~AndConstraint() { | |
783 | delete rangeList; | |
784 | if (next!=NULL) { | |
785 | delete next; | |
786 | } | |
787 | } | |
788 | ||
789 | ||
790 | UBool | |
791 | AndConstraint::isFulfilled(const FixedDecimal &number) { | |
792 | UBool result = TRUE; | |
793 | if (digitsType == none) { | |
794 | // An empty AndConstraint, created by a rule with a keyword but no following expression. | |
795 | return TRUE; | |
796 | } | |
797 | double n = number.get(digitsType); // pulls n | i | v | f value for the number. | |
798 | // Will always be positive. | |
799 | // May be non-integer (n option only) | |
800 | do { | |
801 | if (integerOnly && n != uprv_floor(n)) { | |
802 | result = FALSE; | |
803 | break; | |
804 | } | |
805 | ||
806 | if (op == MOD) { | |
807 | n = fmod(n, opNum); | |
808 | } | |
809 | if (rangeList == NULL) { | |
810 | result = value == -1 || // empty rule | |
811 | n == value; // 'is' rule | |
812 | break; | |
813 | } | |
814 | result = FALSE; // 'in' or 'within' rule | |
815 | for (int32_t r=0; r<rangeList->size(); r+=2) { | |
816 | if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) { | |
817 | result = TRUE; | |
818 | break; | |
819 | } | |
820 | } | |
821 | } while (FALSE); | |
822 | ||
823 | if (negated) { | |
824 | result = !result; | |
825 | } | |
826 | return result; | |
827 | } | |
828 | ||
829 | ||
830 | AndConstraint* | |
831 | AndConstraint::add() | |
832 | { | |
833 | this->next = new AndConstraint(); | |
834 | return this->next; | |
835 | } | |
836 | ||
837 | OrConstraint::OrConstraint() { | |
838 | childNode=NULL; | |
839 | next=NULL; | |
840 | } | |
841 | ||
842 | OrConstraint::OrConstraint(const OrConstraint& other) { | |
843 | if ( other.childNode == NULL ) { | |
844 | this->childNode = NULL; | |
845 | } | |
846 | else { | |
847 | this->childNode = new AndConstraint(*(other.childNode)); | |
848 | } | |
849 | if (other.next == NULL ) { | |
850 | this->next = NULL; | |
851 | } | |
852 | else { | |
853 | this->next = new OrConstraint(*(other.next)); | |
854 | } | |
855 | } | |
856 | ||
857 | OrConstraint::~OrConstraint() { | |
858 | if (childNode!=NULL) { | |
859 | delete childNode; | |
860 | } | |
861 | if (next!=NULL) { | |
862 | delete next; | |
863 | } | |
864 | } | |
865 | ||
866 | AndConstraint* | |
867 | OrConstraint::add() | |
868 | { | |
869 | OrConstraint *curOrConstraint=this; | |
870 | { | |
871 | while (curOrConstraint->next!=NULL) { | |
872 | curOrConstraint = curOrConstraint->next; | |
873 | } | |
874 | U_ASSERT(curOrConstraint->childNode == NULL); | |
875 | curOrConstraint->childNode = new AndConstraint(); | |
876 | } | |
877 | return curOrConstraint->childNode; | |
878 | } | |
879 | ||
880 | UBool | |
881 | OrConstraint::isFulfilled(const FixedDecimal &number) { | |
882 | OrConstraint* orRule=this; | |
883 | UBool result=FALSE; | |
884 | ||
885 | while (orRule!=NULL && !result) { | |
886 | result=TRUE; | |
887 | AndConstraint* andRule = orRule->childNode; | |
888 | while (andRule!=NULL && result) { | |
889 | result = andRule->isFulfilled(number); | |
890 | andRule=andRule->next; | |
891 | } | |
892 | orRule = orRule->next; | |
893 | } | |
894 | ||
895 | return result; | |
896 | } | |
897 | ||
898 | ||
899 | RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(), | |
900 | fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) { | |
901 | } | |
902 | ||
903 | RuleChain::RuleChain(const RuleChain& other) : | |
904 | fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples), | |
905 | fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded), | |
906 | fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) { | |
907 | if (other.ruleHeader != NULL) { | |
908 | this->ruleHeader = new OrConstraint(*(other.ruleHeader)); | |
909 | } | |
910 | if (other.fNext != NULL ) { | |
911 | this->fNext = new RuleChain(*other.fNext); | |
912 | } | |
913 | } | |
914 | ||
915 | RuleChain::~RuleChain() { | |
916 | delete fNext; | |
917 | delete ruleHeader; | |
918 | } | |
919 | ||
920 | ||
921 | UnicodeString | |
922 | RuleChain::select(const FixedDecimal &number) const { | |
923 | if (!number.isNanOrInfinity) { | |
924 | for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) { | |
925 | if (rules->ruleHeader->isFulfilled(number)) { | |
926 | return rules->fKeyword; | |
927 | } | |
928 | } | |
929 | } | |
930 | return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); | |
931 | } | |
932 | ||
933 | static UnicodeString tokenString(tokenType tok) { | |
934 | UnicodeString s; | |
935 | switch (tok) { | |
936 | case tVariableN: | |
937 | s.append(LOW_N); break; | |
938 | case tVariableI: | |
939 | s.append(LOW_I); break; | |
940 | case tVariableF: | |
941 | s.append(LOW_F); break; | |
942 | case tVariableV: | |
943 | s.append(LOW_V); break; | |
944 | case tVariableT: | |
945 | s.append(LOW_T); break; | |
946 | default: | |
947 | s.append(TILDE); | |
948 | } | |
949 | return s; | |
950 | } | |
951 | ||
952 | void | |
953 | RuleChain::dumpRules(UnicodeString& result) { | |
954 | UChar digitString[16]; | |
955 | ||
956 | if ( ruleHeader != NULL ) { | |
957 | result += fKeyword; | |
958 | result += COLON; | |
959 | result += SPACE; | |
960 | OrConstraint* orRule=ruleHeader; | |
961 | while ( orRule != NULL ) { | |
962 | AndConstraint* andRule=orRule->childNode; | |
963 | while ( andRule != NULL ) { | |
964 | if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) { | |
965 | // Empty Rules. | |
966 | } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) { | |
967 | result += tokenString(andRule->digitsType); | |
968 | result += UNICODE_STRING_SIMPLE(" is "); | |
969 | if (andRule->negated) { | |
970 | result += UNICODE_STRING_SIMPLE("not "); | |
971 | } | |
972 | uprv_itou(digitString,16, andRule->value,10,0); | |
973 | result += UnicodeString(digitString); | |
974 | } | |
975 | else { | |
976 | result += tokenString(andRule->digitsType); | |
977 | result += SPACE; | |
978 | if (andRule->op==AndConstraint::MOD) { | |
979 | result += UNICODE_STRING_SIMPLE("mod "); | |
980 | uprv_itou(digitString,16, andRule->opNum,10,0); | |
981 | result += UnicodeString(digitString); | |
982 | } | |
983 | if (andRule->rangeList==NULL) { | |
984 | if (andRule->negated) { | |
985 | result += UNICODE_STRING_SIMPLE(" is not "); | |
986 | uprv_itou(digitString,16, andRule->value,10,0); | |
987 | result += UnicodeString(digitString); | |
988 | } | |
989 | else { | |
990 | result += UNICODE_STRING_SIMPLE(" is "); | |
991 | uprv_itou(digitString,16, andRule->value,10,0); | |
992 | result += UnicodeString(digitString); | |
993 | } | |
994 | } | |
995 | else { | |
996 | if (andRule->negated) { | |
997 | if ( andRule->integerOnly ) { | |
998 | result += UNICODE_STRING_SIMPLE(" not in "); | |
999 | } | |
1000 | else { | |
1001 | result += UNICODE_STRING_SIMPLE(" not within "); | |
1002 | } | |
1003 | } | |
1004 | else { | |
1005 | if ( andRule->integerOnly ) { | |
1006 | result += UNICODE_STRING_SIMPLE(" in "); | |
1007 | } | |
1008 | else { | |
1009 | result += UNICODE_STRING_SIMPLE(" within "); | |
1010 | } | |
1011 | } | |
1012 | for (int32_t r=0; r<andRule->rangeList->size(); r+=2) { | |
1013 | int32_t rangeLo = andRule->rangeList->elementAti(r); | |
1014 | int32_t rangeHi = andRule->rangeList->elementAti(r+1); | |
1015 | uprv_itou(digitString,16, rangeLo, 10, 0); | |
1016 | result += UnicodeString(digitString); | |
1017 | result += UNICODE_STRING_SIMPLE(".."); | |
1018 | uprv_itou(digitString,16, rangeHi, 10,0); | |
1019 | result += UnicodeString(digitString); | |
1020 | if (r+2 < andRule->rangeList->size()) { | |
1021 | result += UNICODE_STRING_SIMPLE(", "); | |
1022 | } | |
1023 | } | |
1024 | } | |
1025 | } | |
1026 | if ( (andRule=andRule->next) != NULL) { | |
1027 | result += UNICODE_STRING_SIMPLE(" and "); | |
1028 | } | |
1029 | } | |
1030 | if ( (orRule = orRule->next) != NULL ) { | |
1031 | result += UNICODE_STRING_SIMPLE(" or "); | |
1032 | } | |
1033 | } | |
1034 | } | |
1035 | if ( fNext != NULL ) { | |
1036 | result += UNICODE_STRING_SIMPLE("; "); | |
1037 | fNext->dumpRules(result); | |
1038 | } | |
1039 | } | |
1040 | ||
1041 | ||
1042 | UErrorCode | |
1043 | RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { | |
1044 | if ( arraySize < capacityOfKeywords-1 ) { | |
1045 | keywords[arraySize++]=fKeyword; | |
1046 | } | |
1047 | else { | |
1048 | return U_BUFFER_OVERFLOW_ERROR; | |
1049 | } | |
1050 | ||
1051 | if ( fNext != NULL ) { | |
1052 | return fNext->getKeywords(capacityOfKeywords, keywords, arraySize); | |
1053 | } | |
1054 | else { | |
1055 | return U_ZERO_ERROR; | |
1056 | } | |
1057 | } | |
1058 | ||
1059 | UBool | |
1060 | RuleChain::isKeyword(const UnicodeString& keywordParam) const { | |
1061 | if ( fKeyword == keywordParam ) { | |
1062 | return TRUE; | |
1063 | } | |
1064 | ||
1065 | if ( fNext != NULL ) { | |
1066 | return fNext->isKeyword(keywordParam); | |
1067 | } | |
1068 | else { | |
1069 | return FALSE; | |
1070 | } | |
1071 | } | |
1072 | ||
1073 | ||
1074 | PluralRuleParser::PluralRuleParser() : | |
1075 | ruleIndex(0), token(), type(none), prevType(none), | |
1076 | curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1) | |
1077 | { | |
1078 | } | |
1079 | ||
1080 | PluralRuleParser::~PluralRuleParser() { | |
1081 | } | |
1082 | ||
1083 | ||
1084 | int32_t | |
1085 | PluralRuleParser::getNumberValue(const UnicodeString& token) { | |
1086 | int32_t i; | |
1087 | char digits[128]; | |
1088 | ||
1089 | i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); | |
1090 | digits[i]='\0'; | |
1091 | ||
1092 | return((int32_t)atoi(digits)); | |
1093 | } | |
1094 | ||
1095 | ||
1096 | void | |
1097 | PluralRuleParser::checkSyntax(UErrorCode &status) | |
1098 | { | |
1099 | if (U_FAILURE(status)) { | |
1100 | return; | |
1101 | } | |
1102 | if (!(prevType==none || prevType==tSemiColon)) { | |
1103 | type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word, | |
1104 | // and we are not at the start of a rule, where a | |
1105 | // keyword is expected. | |
1106 | } | |
1107 | ||
1108 | switch(prevType) { | |
1109 | case none: | |
1110 | case tSemiColon: | |
1111 | if (type!=tKeyword && type != tEOF) { | |
1112 | status = U_UNEXPECTED_TOKEN; | |
1113 | } | |
1114 | break; | |
1115 | case tVariableN: | |
1116 | case tVariableI: | |
1117 | case tVariableF: | |
1118 | case tVariableT: | |
1119 | case tVariableV: | |
1120 | if (type != tIs && type != tMod && type != tIn && | |
1121 | type != tNot && type != tWithin && type != tEqual && type != tNotEqual) { | |
1122 | status = U_UNEXPECTED_TOKEN; | |
1123 | } | |
1124 | break; | |
1125 | case tKeyword: | |
1126 | if (type != tColon) { | |
1127 | status = U_UNEXPECTED_TOKEN; | |
1128 | } | |
1129 | break; | |
1130 | case tColon: | |
1131 | if (!(type == tVariableN || | |
1132 | type == tVariableI || | |
1133 | type == tVariableF || | |
1134 | type == tVariableT || | |
1135 | type == tVariableV || | |
1136 | type == tAt)) { | |
1137 | status = U_UNEXPECTED_TOKEN; | |
1138 | } | |
1139 | break; | |
1140 | case tIs: | |
1141 | if ( type != tNumber && type != tNot) { | |
1142 | status = U_UNEXPECTED_TOKEN; | |
1143 | } | |
1144 | break; | |
1145 | case tNot: | |
1146 | if (type != tNumber && type != tIn && type != tWithin) { | |
1147 | status = U_UNEXPECTED_TOKEN; | |
1148 | } | |
1149 | break; | |
1150 | case tMod: | |
1151 | case tDot2: | |
1152 | case tIn: | |
1153 | case tWithin: | |
1154 | case tEqual: | |
1155 | case tNotEqual: | |
1156 | if (type != tNumber) { | |
1157 | status = U_UNEXPECTED_TOKEN; | |
1158 | } | |
1159 | break; | |
1160 | case tAnd: | |
1161 | case tOr: | |
1162 | if ( type != tVariableN && | |
1163 | type != tVariableI && | |
1164 | type != tVariableF && | |
1165 | type != tVariableT && | |
1166 | type != tVariableV) { | |
1167 | status = U_UNEXPECTED_TOKEN; | |
1168 | } | |
1169 | break; | |
1170 | case tComma: | |
1171 | if (type != tNumber) { | |
1172 | status = U_UNEXPECTED_TOKEN; | |
1173 | } | |
1174 | break; | |
1175 | case tNumber: | |
1176 | if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot && | |
1177 | type != tIn && type != tEqual && type != tNotEqual && type != tWithin && | |
1178 | type != tAnd && type != tOr && type != tComma && type != tAt && | |
1179 | type != tEOF) | |
1180 | { | |
1181 | status = U_UNEXPECTED_TOKEN; | |
1182 | } | |
1183 | // TODO: a comma following a number that is not part of a range will be allowed. | |
1184 | // It's not the only case of this sort of thing. Parser needs a re-write. | |
1185 | break; | |
1186 | case tAt: | |
1187 | if (type != tDecimal && type != tInteger) { | |
1188 | status = U_UNEXPECTED_TOKEN; | |
1189 | } | |
1190 | break; | |
1191 | default: | |
1192 | status = U_UNEXPECTED_TOKEN; | |
1193 | break; | |
1194 | } | |
1195 | } | |
1196 | ||
1197 | ||
1198 | /* | |
1199 | * Scan the next token from the input rules. | |
1200 | * rules and returned token type are in the parser state variables. | |
1201 | */ | |
1202 | void | |
1203 | PluralRuleParser::getNextToken(UErrorCode &status) | |
1204 | { | |
1205 | if (U_FAILURE(status)) { | |
1206 | return; | |
1207 | } | |
1208 | ||
1209 | UChar ch; | |
1210 | while (ruleIndex < ruleSrc->length()) { | |
1211 | ch = ruleSrc->charAt(ruleIndex); | |
1212 | type = charType(ch); | |
1213 | if (type != tSpace) { | |
1214 | break; | |
1215 | } | |
1216 | ++(ruleIndex); | |
1217 | } | |
1218 | if (ruleIndex >= ruleSrc->length()) { | |
1219 | type = tEOF; | |
1220 | return; | |
1221 | } | |
1222 | int32_t curIndex= ruleIndex; | |
1223 | ||
1224 | switch (type) { | |
1225 | case tColon: | |
1226 | case tSemiColon: | |
1227 | case tComma: | |
1228 | case tEllipsis: | |
1229 | case tTilde: // scanned '~' | |
1230 | case tAt: // scanned '@' | |
1231 | case tEqual: // scanned '=' | |
1232 | case tMod: // scanned '%' | |
1233 | // Single character tokens. | |
1234 | ++curIndex; | |
1235 | break; | |
1236 | ||
1237 | case tNotEqual: // scanned '!' | |
1238 | if (ruleSrc->charAt(curIndex+1) == EQUALS) { | |
1239 | curIndex += 2; | |
1240 | } else { | |
1241 | type = none; | |
1242 | curIndex += 1; | |
1243 | } | |
1244 | break; | |
1245 | ||
1246 | case tKeyword: | |
1247 | while (type == tKeyword && ++curIndex < ruleSrc->length()) { | |
1248 | ch = ruleSrc->charAt(curIndex); | |
1249 | type = charType(ch); | |
1250 | } | |
1251 | type = tKeyword; | |
1252 | break; | |
1253 | ||
1254 | case tNumber: | |
1255 | while (type == tNumber && ++curIndex < ruleSrc->length()) { | |
1256 | ch = ruleSrc->charAt(curIndex); | |
1257 | type = charType(ch); | |
1258 | } | |
1259 | type = tNumber; | |
1260 | break; | |
1261 | ||
1262 | case tDot: | |
1263 | // We could be looking at either ".." in a range, or "..." at the end of a sample. | |
1264 | if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) { | |
1265 | ++curIndex; | |
1266 | break; // Single dot | |
1267 | } | |
1268 | if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) { | |
1269 | curIndex += 2; | |
1270 | type = tDot2; | |
1271 | break; // double dot | |
1272 | } | |
1273 | type = tEllipsis; | |
1274 | curIndex += 3; | |
1275 | break; // triple dot | |
1276 | ||
1277 | default: | |
1278 | status = U_UNEXPECTED_TOKEN; | |
1279 | ++curIndex; | |
1280 | break; | |
1281 | } | |
1282 | ||
1283 | U_ASSERT(ruleIndex <= ruleSrc->length()); | |
1284 | U_ASSERT(curIndex <= ruleSrc->length()); | |
1285 | token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex); | |
1286 | ruleIndex = curIndex; | |
1287 | } | |
1288 | ||
1289 | tokenType | |
1290 | PluralRuleParser::charType(UChar ch) { | |
1291 | if ((ch>=U_ZERO) && (ch<=U_NINE)) { | |
1292 | return tNumber; | |
1293 | } | |
1294 | if (ch>=LOW_A && ch<=LOW_Z) { | |
1295 | return tKeyword; | |
1296 | } | |
1297 | switch (ch) { | |
1298 | case COLON: | |
1299 | return tColon; | |
1300 | case SPACE: | |
1301 | return tSpace; | |
1302 | case SEMI_COLON: | |
1303 | return tSemiColon; | |
1304 | case DOT: | |
1305 | return tDot; | |
1306 | case COMMA: | |
1307 | return tComma; | |
1308 | case EXCLAMATION: | |
1309 | return tNotEqual; | |
1310 | case EQUALS: | |
1311 | return tEqual; | |
1312 | case PERCENT_SIGN: | |
1313 | return tMod; | |
1314 | case AT: | |
1315 | return tAt; | |
1316 | case ELLIPSIS: | |
1317 | return tEllipsis; | |
1318 | case TILDE: | |
1319 | return tTilde; | |
1320 | default : | |
1321 | return none; | |
1322 | } | |
1323 | } | |
1324 | ||
1325 | ||
1326 | // Set token type for reserved words in the Plural Rule syntax. | |
1327 | ||
1328 | tokenType | |
1329 | PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType) | |
1330 | { | |
1331 | if (keyType != tKeyword) { | |
1332 | return keyType; | |
1333 | } | |
1334 | ||
1335 | if (0 == token.compare(PK_VAR_N, 1)) { | |
1336 | keyType = tVariableN; | |
1337 | } else if (0 == token.compare(PK_VAR_I, 1)) { | |
1338 | keyType = tVariableI; | |
1339 | } else if (0 == token.compare(PK_VAR_F, 1)) { | |
1340 | keyType = tVariableF; | |
1341 | } else if (0 == token.compare(PK_VAR_T, 1)) { | |
1342 | keyType = tVariableT; | |
1343 | } else if (0 == token.compare(PK_VAR_V, 1)) { | |
1344 | keyType = tVariableV; | |
1345 | } else if (0 == token.compare(PK_IS, 2)) { | |
1346 | keyType = tIs; | |
1347 | } else if (0 == token.compare(PK_AND, 3)) { | |
1348 | keyType = tAnd; | |
1349 | } else if (0 == token.compare(PK_IN, 2)) { | |
1350 | keyType = tIn; | |
1351 | } else if (0 == token.compare(PK_WITHIN, 6)) { | |
1352 | keyType = tWithin; | |
1353 | } else if (0 == token.compare(PK_NOT, 3)) { | |
1354 | keyType = tNot; | |
1355 | } else if (0 == token.compare(PK_MOD, 3)) { | |
1356 | keyType = tMod; | |
1357 | } else if (0 == token.compare(PK_OR, 2)) { | |
1358 | keyType = tOr; | |
1359 | } else if (0 == token.compare(PK_DECIMAL, 7)) { | |
1360 | keyType = tDecimal; | |
1361 | } else if (0 == token.compare(PK_INTEGER, 7)) { | |
1362 | keyType = tInteger; | |
1363 | } | |
1364 | return keyType; | |
1365 | } | |
1366 | ||
1367 | ||
1368 | PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) | |
1369 | : pos(0), fKeywordNames(status) { | |
1370 | if (U_FAILURE(status)) { | |
1371 | return; | |
1372 | } | |
1373 | fKeywordNames.setDeleter(uprv_deleteUObject); | |
1374 | UBool addKeywordOther=TRUE; | |
1375 | RuleChain *node=header; | |
1376 | while(node!=NULL) { | |
1377 | fKeywordNames.addElement(new UnicodeString(node->fKeyword), status); | |
1378 | if (U_FAILURE(status)) { | |
1379 | return; | |
1380 | } | |
1381 | if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) { | |
1382 | addKeywordOther= FALSE; | |
1383 | } | |
1384 | node=node->fNext; | |
1385 | } | |
1386 | ||
1387 | if (addKeywordOther) { | |
1388 | fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); | |
1389 | } | |
1390 | } | |
1391 | ||
1392 | const UnicodeString* | |
1393 | PluralKeywordEnumeration::snext(UErrorCode& status) { | |
1394 | if (U_SUCCESS(status) && pos < fKeywordNames.size()) { | |
1395 | return (const UnicodeString*)fKeywordNames.elementAt(pos++); | |
1396 | } | |
1397 | return NULL; | |
1398 | } | |
1399 | ||
1400 | void | |
1401 | PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { | |
1402 | pos=0; | |
1403 | } | |
1404 | ||
1405 | int32_t | |
1406 | PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { | |
1407 | return fKeywordNames.size(); | |
1408 | } | |
1409 | ||
1410 | PluralKeywordEnumeration::~PluralKeywordEnumeration() { | |
1411 | } | |
1412 | ||
1413 | ||
1414 | ||
1415 | FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) { | |
1416 | init(n, v, f); | |
1417 | // check values. TODO make into unit test. | |
1418 | // | |
1419 | // long visiblePower = (int) Math.pow(10, v); | |
1420 | // if (decimalDigits > visiblePower) { | |
1421 | // throw new IllegalArgumentException(); | |
1422 | // } | |
1423 | // double fraction = intValue + (decimalDigits / (double) visiblePower); | |
1424 | // if (fraction != source) { | |
1425 | // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source)); | |
1426 | // if (diff > 0.00000001d) { | |
1427 | // throw new IllegalArgumentException(); | |
1428 | // } | |
1429 | // } | |
1430 | } | |
1431 | ||
1432 | FixedDecimal::FixedDecimal(double n, int32_t v) { | |
1433 | // Ugly, but for samples we don't care. | |
1434 | init(n, v, getFractionalDigits(n, v)); | |
1435 | } | |
1436 | ||
1437 | FixedDecimal::FixedDecimal(double n) { | |
1438 | init(n); | |
1439 | } | |
1440 | ||
1441 | FixedDecimal::FixedDecimal() { | |
1442 | init(0, 0, 0); | |
1443 | } | |
1444 | ||
1445 | ||
1446 | // Create a FixedDecimal from a UnicodeString containing a number. | |
1447 | // Inefficient, but only used for samples, so simplicity trumps efficiency. | |
1448 | ||
1449 | FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { | |
1450 | CharString cs; | |
1451 | cs.appendInvariantChars(num, status); | |
1452 | DigitList dl; | |
1453 | dl.set(cs.toStringPiece(), status); | |
1454 | if (U_FAILURE(status)) { | |
1455 | init(0, 0, 0); | |
1456 | return; | |
1457 | } | |
1458 | int32_t decimalPoint = num.indexOf(DOT); | |
1459 | double n = dl.getDouble(); | |
1460 | if (decimalPoint == -1) { | |
1461 | init(n, 0, 0); | |
1462 | } else { | |
1463 | int32_t v = num.length() - decimalPoint - 1; | |
1464 | init(n, v, getFractionalDigits(n, v)); | |
1465 | } | |
1466 | } | |
1467 | ||
1468 | ||
1469 | FixedDecimal::FixedDecimal(const FixedDecimal &other) { | |
1470 | source = other.source; | |
1471 | visibleDecimalDigitCount = other.visibleDecimalDigitCount; | |
1472 | decimalDigits = other.decimalDigits; | |
1473 | decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros; | |
1474 | intValue = other.intValue; | |
1475 | hasIntegerValue = other.hasIntegerValue; | |
1476 | isNegative = other.isNegative; | |
1477 | isNanOrInfinity = other.isNanOrInfinity; | |
1478 | } | |
1479 | ||
1480 | ||
1481 | void FixedDecimal::init(double n) { | |
1482 | int32_t numFractionDigits = decimals(n); | |
1483 | init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); | |
1484 | } | |
1485 | ||
1486 | ||
1487 | void FixedDecimal::init(double n, int32_t v, int64_t f) { | |
1488 | isNegative = n < 0.0; | |
1489 | source = fabs(n); | |
1490 | isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source); | |
1491 | if (isNanOrInfinity) { | |
1492 | v = 0; | |
1493 | f = 0; | |
1494 | intValue = 0; | |
1495 | hasIntegerValue = FALSE; | |
1496 | } else { | |
1497 | intValue = (int64_t)source; | |
1498 | hasIntegerValue = (source == intValue); | |
1499 | } | |
1500 | ||
1501 | visibleDecimalDigitCount = v; | |
1502 | decimalDigits = f; | |
1503 | if (f == 0) { | |
1504 | decimalDigitsWithoutTrailingZeros = 0; | |
1505 | } else { | |
1506 | int64_t fdwtz = f; | |
1507 | while ((fdwtz%10) == 0) { | |
1508 | fdwtz /= 10; | |
1509 | } | |
1510 | decimalDigitsWithoutTrailingZeros = fdwtz; | |
1511 | } | |
1512 | } | |
1513 | ||
1514 | ||
1515 | // Fast path only exact initialization. Return true if successful. | |
1516 | // Note: Do not multiply by 10 each time through loop, rounding cruft can build | |
1517 | // up that makes the check for an integer result fail. | |
1518 | // A single multiply of the original number works more reliably. | |
1519 | static int32_t p10[] = {1, 10, 100, 1000, 10000}; | |
1520 | UBool FixedDecimal::quickInit(double n) { | |
1521 | UBool success = FALSE; | |
1522 | n = fabs(n); | |
1523 | int32_t numFractionDigits; | |
1524 | for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) { | |
1525 | double scaledN = n * p10[numFractionDigits]; | |
1526 | if (scaledN == floor(scaledN)) { | |
1527 | success = TRUE; | |
1528 | break; | |
1529 | } | |
1530 | } | |
1531 | if (success) { | |
1532 | init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); | |
1533 | } | |
1534 | return success; | |
1535 | } | |
1536 | ||
1537 | ||
1538 | ||
1539 | int32_t FixedDecimal::decimals(double n) { | |
1540 | // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros. | |
1541 | // fastpath the common cases, integers or fractions with 3 or fewer digits | |
1542 | n = fabs(n); | |
1543 | for (int ndigits=0; ndigits<=3; ndigits++) { | |
1544 | double scaledN = n * p10[ndigits]; | |
1545 | if (scaledN == floor(scaledN)) { | |
1546 | return ndigits; | |
1547 | } | |
1548 | } | |
1549 | ||
1550 | // Slow path, convert with sprintf, parse converted output. | |
1551 | char buf[30] = {0}; | |
1552 | sprintf(buf, "%1.15e", n); | |
1553 | // formatted number looks like this: 1.234567890123457e-01 | |
1554 | int exponent = atoi(buf+18); | |
1555 | int numFractionDigits = 15; | |
1556 | for (int i=16; ; --i) { | |
1557 | if (buf[i] != '0') { | |
1558 | break; | |
1559 | } | |
1560 | --numFractionDigits; | |
1561 | } | |
1562 | numFractionDigits -= exponent; // Fraction part of fixed point representation. | |
1563 | return numFractionDigits; | |
1564 | } | |
1565 | ||
1566 | ||
1567 | // Get the fraction digits of a double, represented as an integer. | |
1568 | // v is the number of visible fraction digits in the displayed form of the number. | |
1569 | // Example: n = 1001.234, v = 6, result = 234000 | |
1570 | // TODO: need to think through how this is used in the plural rule context. | |
1571 | // This function can easily encounter integer overflow, | |
1572 | // and can easily return noise digits when the precision of a double is exceeded. | |
1573 | ||
1574 | int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) { | |
1575 | if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) { | |
1576 | return 0; | |
1577 | } | |
1578 | n = fabs(n); | |
1579 | double fract = n - floor(n); | |
1580 | switch (v) { | |
1581 | case 1: return (int64_t)(fract*10.0 + 0.5); | |
1582 | case 2: return (int64_t)(fract*100.0 + 0.5); | |
1583 | case 3: return (int64_t)(fract*1000.0 + 0.5); | |
1584 | default: | |
1585 | double scaled = floor(fract * pow(10.0, (double)v) + 0.5); | |
1586 | if (scaled > U_INT64_MAX) { | |
1587 | return U_INT64_MAX; | |
1588 | } else { | |
1589 | return (int64_t)scaled; | |
1590 | } | |
1591 | } | |
1592 | } | |
1593 | ||
1594 | ||
1595 | void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) { | |
1596 | int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount; | |
1597 | if (numTrailingFractionZeros > 0) { | |
1598 | for (int32_t i=0; i<numTrailingFractionZeros; i++) { | |
1599 | // Do not let the decimalDigits value overflow if there are many trailing zeros. | |
1600 | // Limit the value to 18 digits, the most that a 64 bit int can fully represent. | |
1601 | if (decimalDigits >= 100000000000000000LL) { | |
1602 | break; | |
1603 | } | |
1604 | decimalDigits *= 10; | |
1605 | } | |
1606 | visibleDecimalDigitCount += numTrailingFractionZeros; | |
1607 | } | |
1608 | } | |
1609 | ||
1610 | ||
1611 | double FixedDecimal::get(tokenType operand) const { | |
1612 | switch(operand) { | |
1613 | case tVariableN: return source; | |
1614 | case tVariableI: return (double)intValue; | |
1615 | case tVariableF: return (double)decimalDigits; | |
1616 | case tVariableT: return (double)decimalDigitsWithoutTrailingZeros; | |
1617 | case tVariableV: return visibleDecimalDigitCount; | |
1618 | default: | |
1619 | U_ASSERT(FALSE); // unexpected. | |
1620 | return source; | |
1621 | } | |
1622 | } | |
1623 | ||
1624 | int32_t FixedDecimal::getVisibleFractionDigitCount() const { | |
1625 | return visibleDecimalDigitCount; | |
1626 | } | |
1627 | ||
1628 | ||
1629 | ||
1630 | PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) { | |
1631 | fLocales = NULL; | |
1632 | fRes = NULL; | |
1633 | fOpenStatus = status; | |
1634 | if (U_FAILURE(status)) { | |
1635 | return; | |
1636 | } | |
1637 | fOpenStatus = U_ZERO_ERROR; | |
1638 | LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus)); | |
1639 | fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus); | |
1640 | } | |
1641 | ||
1642 | PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() { | |
1643 | ures_close(fLocales); | |
1644 | ures_close(fRes); | |
1645 | fLocales = NULL; | |
1646 | fRes = NULL; | |
1647 | } | |
1648 | ||
1649 | const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) { | |
1650 | if (U_FAILURE(status)) { | |
1651 | return NULL; | |
1652 | } | |
1653 | if (U_FAILURE(fOpenStatus)) { | |
1654 | status = fOpenStatus; | |
1655 | return NULL; | |
1656 | } | |
1657 | fRes = ures_getNextResource(fLocales, fRes, &status); | |
1658 | if (fRes == NULL || U_FAILURE(status)) { | |
1659 | if (status == U_INDEX_OUTOFBOUNDS_ERROR) { | |
1660 | status = U_ZERO_ERROR; | |
1661 | } | |
1662 | return NULL; | |
1663 | } | |
1664 | const char *result = ures_getKey(fRes); | |
1665 | if (resultLength != NULL) { | |
1666 | *resultLength = uprv_strlen(result); | |
1667 | } | |
1668 | return result; | |
1669 | } | |
1670 | ||
1671 | ||
1672 | void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) { | |
1673 | if (U_FAILURE(status)) { | |
1674 | return; | |
1675 | } | |
1676 | if (U_FAILURE(fOpenStatus)) { | |
1677 | status = fOpenStatus; | |
1678 | return; | |
1679 | } | |
1680 | ures_resetIterator(fLocales); | |
1681 | } | |
1682 | ||
1683 | int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const { | |
1684 | if (U_FAILURE(status)) { | |
1685 | return 0; | |
1686 | } | |
1687 | if (U_FAILURE(fOpenStatus)) { | |
1688 | status = fOpenStatus; | |
1689 | return 0; | |
1690 | } | |
1691 | return ures_getSize(fLocales); | |
1692 | } | |
1693 | ||
1694 | U_NAMESPACE_END | |
1695 | ||
1696 | ||
1697 | #endif /* #if !UCONFIG_NO_FORMATTING */ | |
1698 | ||
1699 | //eof |