]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/nfrs.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / nfrs.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
2ca993e8 5* Copyright (C) 1997-2015, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7******************************************************************************
8* file name: nfrs.cpp
f3c0d7a5 9* encoding: UTF-8
b75a7d8f
A
10* tab size: 8 (not used)
11* indentation:4
12*
13* Modification history
14* Date Name Comments
15* 10/11/2001 Doug Ported from ICU4J
16*/
17
18#include "nfrs.h"
19
20#if U_HAVE_RBNF
21
22#include "unicode/uchar.h"
23#include "nfrule.h"
24#include "nfrlist.h"
4388f060 25#include "patternprops.h"
f3c0d7a5 26#include "putilimp.h"
b75a7d8f
A
27
28#ifdef RBNF_DEBUG
29#include "cmemory.h"
30#endif
31
2ca993e8
A
32enum {
33 /** -x */
34 NEGATIVE_RULE_INDEX = 0,
35 /** x.x */
36 IMPROPER_FRACTION_RULE_INDEX = 1,
37 /** 0.x */
38 PROPER_FRACTION_RULE_INDEX = 2,
39 /** x.0 */
40 MASTER_RULE_INDEX = 3,
41 /** Inf */
42 INFINITY_RULE_INDEX = 4,
43 /** NaN */
44 NAN_RULE_INDEX = 5,
45 NON_NUMERICAL_RULE_LENGTH = 6
46};
47
b75a7d8f
A
48U_NAMESPACE_BEGIN
49
50#if 0
51// euclid's algorithm works with doubles
52// note, doubles only get us up to one quadrillion or so, which
53// isn't as much range as we get with longs. We probably still
54// want either 64-bit math, or BigInteger.
55
56static int64_t
57util_lcm(int64_t x, int64_t y)
58{
59 x.abs();
60 y.abs();
61
62 if (x == 0 || y == 0) {
63 return 0;
64 } else {
65 do {
66 if (x < y) {
67 int64_t t = x; x = y; y = t;
68 }
69 x -= y * (x/y);
70 } while (x != 0);
71
72 return y;
73 }
74}
75
76#else
77/**
78 * Calculates the least common multiple of x and y.
79 */
80static int64_t
81util_lcm(int64_t x, int64_t y)
82{
83 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84 // vol. 2, 1st ed., pp. 298-299
85 int64_t x1 = x;
86 int64_t y1 = y;
87
88 int p2 = 0;
89 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90 ++p2;
91 x1 >>= 1;
92 y1 >>= 1;
93 }
94
95 int64_t t;
96 if ((x1 & 1) == 1) {
97 t = -y1;
98 } else {
99 t = x1;
100 }
101
102 while (t != 0) {
103 while ((t & 1) == 0) {
104 t = t >> 1;
105 }
106 if (t > 0) {
107 x1 = t;
108 } else {
109 y1 = -t;
110 }
111 t = x1 - y1;
112 }
113
114 int64_t gcd = x1 << p2;
115
116 // x * y == gcd(x, y) * lcm(x, y)
117 return x / gcd * y;
118}
119#endif
120
121static const UChar gPercent = 0x0025;
122static const UChar gColon = 0x003a;
123static const UChar gSemicolon = 0x003b;
124static const UChar gLineFeed = 0x000a;
125
b75a7d8f
A
126static const UChar gPercentPercent[] =
127{
128 0x25, 0x25, 0
129}; /* "%%" */
130
4388f060
A
131static const UChar gNoparse[] =
132{
133 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134}; /* "@noparse" */
135
2ca993e8 136NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
b75a7d8f
A
137 : name()
138 , rules(0)
2ca993e8
A
139 , owner(_owner)
140 , fractionRules()
b75a7d8f
A
141 , fIsFractionRuleSet(FALSE)
142 , fIsPublic(FALSE)
4388f060 143 , fIsParseable(TRUE)
b75a7d8f 144{
2ca993e8
A
145 for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146 nonNumericalRules[i] = NULL;
b75a7d8f
A
147 }
148
149 if (U_FAILURE(status)) {
150 return;
151 }
152
153 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154
374ca955
A
155 if (description.length() == 0) {
156 // throw new IllegalArgumentException("Empty rule set description");
157 status = U_PARSE_ERROR;
73c04bcf 158 return;
374ca955
A
159 }
160
b75a7d8f
A
161 // if the description begins with a rule set name (the rule set
162 // name can be omitted in formatter descriptions that consist
163 // of only one rule set), copy it out into our "name" member
164 // and delete it from the description
165 if (description.charAt(0) == gPercent) {
166 int32_t pos = description.indexOf(gColon);
167 if (pos == -1) {
168 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
169 status = U_PARSE_ERROR;
170 } else {
171 name.setTo(description, 0, pos);
4388f060 172 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
b75a7d8f
A
173 }
174 description.remove(0, pos);
175 }
176 } else {
374ca955 177 name.setTo(UNICODE_STRING_SIMPLE("%default"));
b75a7d8f
A
178 }
179
180 if (description.length() == 0) {
181 // throw new IllegalArgumentException("Empty rule set description");
182 status = U_PARSE_ERROR;
183 }
184
4388f060
A
185 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
186
187 if ( name.endsWith(gNoparse,8) ) {
188 fIsParseable = FALSE;
189 name.truncate(name.length()-8); // remove the @noparse from the name
190 }
b75a7d8f
A
191
192 // all of the other members of NFRuleSet are initialized
193 // by parseRules()
194}
195
196void
2ca993e8 197NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
b75a7d8f
A
198{
199 // start by creating a Vector whose elements are Strings containing
200 // the descriptions of the rules (one rule per element). The rules
201 // are separated by semicolons (there's no escape facility: ALL
202 // semicolons are rule delimiters)
203
204 if (U_FAILURE(status)) {
205 return;
206 }
207
4388f060
A
208 // ensure we are starting with an empty rule list
209 rules.deleteAll();
210
b75a7d8f
A
211 // dlf - the original code kept a separate description array for no reason,
212 // so I got rid of it. The loop was too complex so I simplified it.
213
214 UnicodeString currentDescription;
215 int32_t oldP = 0;
216 while (oldP < description.length()) {
217 int32_t p = description.indexOf(gSemicolon, oldP);
218 if (p == -1) {
219 p = description.length();
220 }
221 currentDescription.setTo(description, oldP, p - oldP);
222 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223 oldP = p + 1;
224 }
225
226 // for rules that didn't specify a base value, their base values
227 // were initialized to 0. Make another pass through the list and
228 // set all those rules' base values. We also remove any special
229 // rules from the list and put them into their own member variables
230 int64_t defaultBaseValue = 0;
231
232 // (this isn't a for loop because we might be deleting items from
233 // the vector-- we want to make sure we only increment i when
234 // we _didn't_ delete aything from the vector)
2ca993e8
A
235 int32_t rulesSize = rules.size();
236 for (int32_t i = 0; i < rulesSize; i++) {
b75a7d8f 237 NFRule* rule = rules[i];
2ca993e8 238 int64_t baseValue = rule->getBaseValue();
b75a7d8f 239
2ca993e8 240 if (baseValue == 0) {
b75a7d8f
A
241 // if the rule's base value is 0, fill in a default
242 // base value (this will be 1 plus the preceding
243 // rule's base value for regular rule sets, and the
244 // same as the preceding rule's base value in fraction
245 // rule sets)
374ca955 246 rule->setBaseValue(defaultBaseValue, status);
2ca993e8
A
247 }
248 else {
b75a7d8f
A
249 // if it's a regular rule that already knows its base value,
250 // check to make sure the rules are in order, and update
251 // the default base value for the next rule
2ca993e8 252 if (baseValue < defaultBaseValue) {
b75a7d8f
A
253 // throw new IllegalArgumentException("Rules are not in order");
254 status = U_PARSE_ERROR;
255 return;
256 }
2ca993e8
A
257 defaultBaseValue = baseValue;
258 }
259 if (!fIsFractionRuleSet) {
260 ++defaultBaseValue;
261 }
262 }
263}
264
265/**
266 * Set one of the non-numerical rules.
267 * @param rule The rule to set.
268 */
269void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270 int64_t baseValue = rule->getBaseValue();
271 if (baseValue == NFRule::kNegativeNumberRule) {
272 delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273 nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274 }
275 else if (baseValue == NFRule::kImproperFractionRule) {
276 setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
277 }
278 else if (baseValue == NFRule::kProperFractionRule) {
279 setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
280 }
281 else if (baseValue == NFRule::kMasterRule) {
282 setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
283 }
284 else if (baseValue == NFRule::kInfinityRule) {
285 delete nonNumericalRules[INFINITY_RULE_INDEX];
286 nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287 }
288 else if (baseValue == NFRule::kNaNRule) {
289 delete nonNumericalRules[NAN_RULE_INDEX];
290 nonNumericalRules[NAN_RULE_INDEX] = rule;
291 }
292}
293
294/**
295 * Determine the best fraction rule to use. Rules matching the decimal point from
296 * DecimalFormatSymbols become the main set of rules to use.
297 * @param originalIndex The index into nonNumericalRules
298 * @param newRule The new rule to consider
299 * @param rememberRule Should the new rule be added to fractionRules.
300 */
301void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
302 if (rememberRule) {
303 fractionRules.add(newRule);
304 }
305 NFRule *bestResult = nonNumericalRules[originalIndex];
306 if (bestResult == NULL) {
307 nonNumericalRules[originalIndex] = newRule;
308 }
309 else {
310 // We have more than one. Which one is better?
311 const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
312 if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
313 == newRule->getDecimalPoint())
314 {
315 nonNumericalRules[originalIndex] = newRule;
b75a7d8f 316 }
2ca993e8 317 // else leave it alone
b75a7d8f
A
318 }
319}
320
321NFRuleSet::~NFRuleSet()
322{
2ca993e8
A
323 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
324 if (i != IMPROPER_FRACTION_RULE_INDEX
325 && i != PROPER_FRACTION_RULE_INDEX
326 && i != MASTER_RULE_INDEX)
327 {
328 delete nonNumericalRules[i];
329 }
330 // else it will be deleted via NFRuleList fractionRules
331 }
b75a7d8f
A
332}
333
374ca955 334static UBool
b75a7d8f
A
335util_equalRules(const NFRule* rule1, const NFRule* rule2)
336{
337 if (rule1) {
338 if (rule2) {
339 return *rule1 == *rule2;
340 }
341 } else if (!rule2) {
342 return TRUE;
343 }
344 return FALSE;
345}
346
347UBool
348NFRuleSet::operator==(const NFRuleSet& rhs) const
349{
350 if (rules.size() == rhs.rules.size() &&
351 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
2ca993e8
A
352 name == rhs.name) {
353
354 // ...then compare the non-numerical rule lists...
355 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
356 if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
357 return FALSE;
358 }
359 }
b75a7d8f 360
2ca993e8 361 // ...then compare the rule lists...
b75a7d8f
A
362 for (uint32_t i = 0; i < rules.size(); ++i) {
363 if (*rules[i] != *rhs.rules[i]) {
364 return FALSE;
365 }
366 }
367 return TRUE;
368 }
369 return FALSE;
370}
371
2ca993e8
A
372void
373NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
374 for (uint32_t i = 0; i < rules.size(); ++i) {
375 rules[i]->setDecimalFormatSymbols(newSymbols, status);
376 }
377 // Switch the fraction rules to mirror the DecimalFormatSymbols.
378 for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
379 if (nonNumericalRules[nonNumericalIdx]) {
380 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
381 NFRule *fractionRule = fractionRules[fIdx];
382 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
383 setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
384 }
385 }
386 }
387 }
388
389 for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
390 NFRule *rule = nonNumericalRules[nnrIdx];
391 if (rule) {
392 rule->setDecimalFormatSymbols(newSymbols, status);
393 }
394 }
395}
396
397#define RECURSION_LIMIT 64
374ca955 398
b75a7d8f 399void
2ca993e8 400NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
b75a7d8f 401{
2ca993e8
A
402 if (recursionCount >= RECURSION_LIMIT) {
403 // stop recursion
404 status = U_INVALID_STATE_ERROR;
405 return;
406 }
407 const NFRule *rule = findNormalRule(number);
374ca955 408 if (rule) { // else error, but can't report it
2ca993e8 409 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
374ca955 410 }
b75a7d8f
A
411}
412
413void
2ca993e8 414NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
b75a7d8f 415{
2ca993e8
A
416 if (recursionCount >= RECURSION_LIMIT) {
417 // stop recursion
418 status = U_INVALID_STATE_ERROR;
419 return;
420 }
421 const NFRule *rule = findDoubleRule(number);
374ca955 422 if (rule) { // else error, but can't report it
2ca993e8 423 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
374ca955 424 }
b75a7d8f
A
425}
426
2ca993e8 427const NFRule*
b75a7d8f
A
428NFRuleSet::findDoubleRule(double number) const
429{
430 // if this is a fraction rule set, use findFractionRuleSetRule()
431 if (isFractionRuleSet()) {
432 return findFractionRuleSetRule(number);
433 }
434
2ca993e8
A
435 if (uprv_isNaN(number)) {
436 const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
437 if (!rule) {
438 rule = owner->getDefaultNaNRule();
439 }
440 return rule;
441 }
442
b75a7d8f
A
443 // if the number is negative, return the negative number rule
444 // (if there isn't a negative-number rule, we pretend it's a
445 // positive number)
446 if (number < 0) {
2ca993e8
A
447 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
448 return nonNumericalRules[NEGATIVE_RULE_INDEX];
b75a7d8f
A
449 } else {
450 number = -number;
451 }
452 }
453
2ca993e8
A
454 if (uprv_isInfinite(number)) {
455 const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
456 if (!rule) {
457 rule = owner->getDefaultInfinityRule();
458 }
459 return rule;
460 }
461
b75a7d8f
A
462 // if the number isn't an integer, we use one of the fraction rules...
463 if (number != uprv_floor(number)) {
464 // if the number is between 0 and 1, return the proper
465 // fraction rule
2ca993e8
A
466 if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
467 return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
b75a7d8f
A
468 }
469 // otherwise, return the improper fraction rule
2ca993e8
A
470 else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
471 return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
b75a7d8f
A
472 }
473 }
474
475 // if there's a master rule, use it to format the number
2ca993e8
A
476 if (nonNumericalRules[MASTER_RULE_INDEX]) {
477 return nonNumericalRules[MASTER_RULE_INDEX];
729e4ab9
A
478 }
479
b75a7d8f
A
480 // and if we haven't yet returned a rule, use findNormalRule()
481 // to find the applicable rule
482 int64_t r = util64_fromDouble(number + 0.5);
483 return findNormalRule(r);
484}
485
2ca993e8 486const NFRule *
b75a7d8f
A
487NFRuleSet::findNormalRule(int64_t number) const
488{
489 // if this is a fraction rule set, use findFractionRuleSetRule()
490 // to find the rule (we should only go into this clause if the
491 // value is 0)
492 if (fIsFractionRuleSet) {
493 return findFractionRuleSetRule((double)number);
494 }
495
496 // if the number is negative, return the negative-number rule
497 // (if there isn't one, pretend the number is positive)
498 if (number < 0) {
2ca993e8
A
499 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
500 return nonNumericalRules[NEGATIVE_RULE_INDEX];
b75a7d8f
A
501 } else {
502 number = -number;
503 }
504 }
505
506 // we have to repeat the preceding two checks, even though we
507 // do them in findRule(), because the version of format() that
508 // takes a long bypasses findRule() and goes straight to this
509 // function. This function does skip the fraction rules since
510 // we know the value is an integer (it also skips the master
511 // rule, since it's considered a fraction rule. Skipping the
512 // master rule in this function is also how we avoid infinite
513 // recursion)
514
515 // {dlf} unfortunately this fails if there are no rules except
516 // special rules. If there are no rules, use the master rule.
517
518 // binary-search the rule list for the applicable rule
519 // (a rule is used for all values from its base value to
520 // the next rule's base value)
521 int32_t hi = rules.size();
522 if (hi > 0) {
523 int32_t lo = 0;
524
525 while (lo < hi) {
526 int32_t mid = (lo + hi) / 2;
527 if (rules[mid]->getBaseValue() == number) {
528 return rules[mid];
529 }
530 else if (rules[mid]->getBaseValue() > number) {
531 hi = mid;
532 }
533 else {
534 lo = mid + 1;
535 }
536 }
374ca955
A
537 if (hi == 0) { // bad rule set, minimum base > 0
538 return NULL; // want to throw exception here
539 }
540
b75a7d8f
A
541 NFRule *result = rules[hi - 1];
542
543 // use shouldRollBack() to see whether we need to invoke the
544 // rollback rule (see shouldRollBack()'s documentation for
545 // an explanation of the rollback rule). If we do, roll back
546 // one rule and return that one instead of the one we'd normally
547 // return
f3c0d7a5 548 if (result->shouldRollBack(number)) {
374ca955
A
549 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
550 return NULL;
551 }
b75a7d8f
A
552 result = rules[hi - 2];
553 }
554 return result;
555 }
556 // else use the master rule
2ca993e8 557 return nonNumericalRules[MASTER_RULE_INDEX];
b75a7d8f
A
558}
559
560/**
561 * If this rule is a fraction rule set, this function is used by
562 * findRule() to select the most appropriate rule for formatting
563 * the number. Basically, the base value of each rule in the rule
564 * set is treated as the denominator of a fraction. Whichever
565 * denominator can produce the fraction closest in value to the
566 * number passed in is the result. If there's a tie, the earlier
567 * one in the list wins. (If there are two rules in a row with the
568 * same base value, the first one is used when the numerator of the
569 * fraction would be 1, and the second rule is used the rest of the
570 * time.
571 * @param number The number being formatted (which will always be
572 * a number between 0 and 1)
573 * @return The rule to use to format this number
574 */
2ca993e8 575const NFRule*
b75a7d8f
A
576NFRuleSet::findFractionRuleSetRule(double number) const
577{
578 // the obvious way to do this (multiply the value being formatted
579 // by each rule's base value until you get an integral result)
580 // doesn't work because of rounding error. This method is more
581 // accurate
582
583 // find the least common multiple of the rules' base values
584 // and multiply this by the number being formatted. This is
585 // all the precision we need, and we can do all of the rest
586 // of the math using integer arithmetic
587 int64_t leastCommonMultiple = rules[0]->getBaseValue();
588 int64_t numerator;
589 {
590 for (uint32_t i = 1; i < rules.size(); ++i) {
591 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
592 }
593 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
594 }
595 // for each rule, do the following...
596 int64_t tempDifference;
597 int64_t difference = util64_fromDouble(uprv_maxMantissa());
598 int32_t winner = 0;
599 for (uint32_t i = 0; i < rules.size(); ++i) {
600 // "numerator" is the numerator of the fraction if the
601 // denominator is the LCD. The numerator if the rule's
602 // base value is the denominator is "numerator" times the
603 // base value divided bythe LCD. Here we check to see if
604 // that's an integer, and if not, how close it is to being
605 // an integer.
606 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
607
608
609 // normalize the result of the above calculation: we want
610 // the numerator's distance from the CLOSEST multiple
611 // of the LCD
612 if (leastCommonMultiple - tempDifference < tempDifference) {
613 tempDifference = leastCommonMultiple - tempDifference;
614 }
615
616 // if this is as close as we've come, keep track of how close
617 // that is, and the line number of the rule that did it. If
618 // we've scored a direct hit, we don't have to look at any more
619 // rules
620 if (tempDifference < difference) {
621 difference = tempDifference;
622 winner = i;
623 if (difference == 0) {
624 break;
625 }
626 }
627 }
628
629 // if we have two successive rules that both have the winning base
630 // value, then the first one (the one we found above) is used if
631 // the numerator of the fraction is 1 and the second one is used if
632 // the numerator of the fraction is anything else (this lets us
633 // do things like "one third"/"two thirds" without haveing to define
634 // a whole bunch of extra rule sets)
635 if ((unsigned)(winner + 1) < rules.size() &&
636 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
637 double n = ((double)rules[winner]->getBaseValue()) * number;
638 if (n < 0.5 || n >= 2) {
639 ++winner;
640 }
641 }
642
643 // finally, return the winning rule
644 return rules[winner];
645}
646
647/**
648 * Parses a string. Matches the string to be parsed against each
649 * of its rules (with a base value less than upperBound) and returns
650 * the value produced by the rule that matched the most charcters
651 * in the source string.
652 * @param text The string to parse
653 * @param parsePosition The initial position is ignored and assumed
654 * to be 0. On exit, this object has been updated to point to the
655 * first character position this rule set didn't consume.
656 * @param upperBound Limits the rules that can be allowed to match.
657 * Only rules whose base values are strictly less than upperBound
658 * are considered.
659 * @return The numerical result of parsing this string. This will
660 * be the matching rule's base value, composed appropriately with
661 * the results of matching any of its substitutions. The object
662 * will be an instance of Long if it's an integral value; otherwise,
663 * it will be an instance of Double. This function always returns
664 * a valid object: If nothing matched the input string at all,
665 * this function returns new Long(0), and the parse position is
666 * left unchanged.
667 */
668#ifdef RBNF_DEBUG
669#include <stdio.h>
670
671static void dumpUS(FILE* f, const UnicodeString& us) {
672 int len = us.length();
673 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
46f4442e
A
674 if (buf != NULL) {
675 us.extract(0, len, buf);
676 buf[len] = 0;
677 fprintf(f, "%s", buf);
678 uprv_free(buf); //delete[] buf;
679 }
b75a7d8f
A
680}
681#endif
682
683UBool
0f5d89e8 684NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result, UBool lenient) const
b75a7d8f
A
685{
686 // try matching each rule in the rule set against the text being
687 // parsed. Whichever one matches the most characters is the one
688 // that determines the value we return.
689
690 result.setLong(0);
691
692 // dump out if there's no text to parse
693 if (text.length() == 0) {
694 return 0;
695 }
696
697 ParsePosition highWaterMark;
698 ParsePosition workingPos = pos;
699
700#ifdef RBNF_DEBUG
701 fprintf(stderr, "<nfrs> %x '", this);
702 dumpUS(stderr, name);
703 fprintf(stderr, "' text '");
704 dumpUS(stderr, text);
705 fprintf(stderr, "'\n");
706 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
707#endif
2ca993e8
A
708 // Try each of the negative rules, fraction rules, infinity rules and NaN rules
709 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
0f5d89e8
A
710 if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
711 // Mark this rule as being executed so that we don't try to execute it again.
712 nonNumericalExecutedRuleMask |= 1 << i;
713
2ca993e8 714 Formattable tempResult;
0f5d89e8 715 UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult, lenient || isDecimalFormatRuleParseable() );
2ca993e8
A
716 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
717 result = tempResult;
718 highWaterMark = workingPos;
b75a7d8f 719 }
2ca993e8 720 workingPos = pos;
b75a7d8f
A
721 }
722 }
723#ifdef RBNF_DEBUG
724 fprintf(stderr, "<nfrs> continue other with text '");
725 dumpUS(stderr, text);
726 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
727#endif
728
729 // finally, go through the regular rules one at a time. We start
730 // at the end of the list because we want to try matching the most
731 // sigificant rule first (this helps ensure that we parse
732 // "five thousand three hundred six" as
733 // "(five thousand) (three hundred) (six)" rather than
734 // "((five thousand three) hundred) (six)"). Skip rules whose
735 // base values are higher than the upper bound (again, this helps
736 // limit ambiguity by making sure the rules that match a rule's
737 // are less significant than the rule containing the substitutions)/
738 {
739 int64_t ub = util64_fromDouble(upperBound);
740#ifdef RBNF_DEBUG
741 {
742 char ubstr[64];
743 util64_toa(ub, ubstr, 64);
744 char ubstrhex[64];
745 util64_toa(ub, ubstrhex, 64, 16);
746 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
747 }
748#endif
749 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
750 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
751 continue;
752 }
753 Formattable tempResult;
0f5d89e8 754 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
b75a7d8f
A
755 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
756 result = tempResult;
757 highWaterMark = workingPos;
758 }
759 workingPos = pos;
760 }
761 }
762#ifdef RBNF_DEBUG
763 fprintf(stderr, "<nfrs> exit\n");
764#endif
765 // finally, update the parse postion we were passed to point to the
766 // first character we didn't use, and return the result that
767 // corresponds to that string of characters
768 pos = highWaterMark;
769
770 return 1;
771}
772
773void
774NFRuleSet::appendRules(UnicodeString& result) const
775{
2ca993e8
A
776 uint32_t i;
777
b75a7d8f
A
778 // the rule set name goes first...
779 result.append(name);
780 result.append(gColon);
781 result.append(gLineFeed);
782
783 // followed by the regular rules...
2ca993e8 784 for (i = 0; i < rules.size(); i++) {
73c04bcf 785 rules[i]->_appendRuleText(result);
b75a7d8f
A
786 result.append(gLineFeed);
787 }
788
789 // followed by the special rules (if they exist)
2ca993e8
A
790 for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
791 NFRule *rule = nonNumericalRules[i];
792 if (nonNumericalRules[i]) {
793 if (rule->getBaseValue() == NFRule::kImproperFractionRule
794 || rule->getBaseValue() == NFRule::kProperFractionRule
795 || rule->getBaseValue() == NFRule::kMasterRule)
796 {
797 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
798 NFRule *fractionRule = fractionRules[fIdx];
799 if (fractionRule->getBaseValue() == rule->getBaseValue()) {
800 fractionRule->_appendRuleText(result);
801 result.append(gLineFeed);
802 }
803 }
804 }
805 else {
806 rule->_appendRuleText(result);
b75a7d8f
A
807 result.append(gLineFeed);
808 }
809 }
810 }
811}
812
813// utility functions
814
815int64_t util64_fromDouble(double d) {
816 int64_t result = 0;
817 if (!uprv_isNaN(d)) {
818 double mant = uprv_maxMantissa();
819 if (d < -mant) {
820 d = -mant;
821 } else if (d > mant) {
822 d = mant;
823 }
824 UBool neg = d < 0;
825 if (neg) {
826 d = -d;
827 }
828 result = (int64_t)uprv_floor(d);
829 if (neg) {
830 result = -result;
831 }
832 }
833 return result;
834}
835
0f5d89e8 836uint64_t util64_pow(uint32_t base, uint16_t exponent) {
f3c0d7a5 837 if (base == 0) {
b75a7d8f 838 return 0;
b75a7d8f 839 }
0f5d89e8
A
840 uint64_t result = 1;
841 uint64_t pow = base;
842 while (true) {
843 if ((exponent & 1) == 1) {
844 result *= pow;
845 }
846 exponent >>= 1;
847 if (exponent == 0) {
848 break;
849 }
850 pow *= pow;
851 }
f3c0d7a5 852 return result;
b75a7d8f
A
853}
854
855static const uint8_t asciiDigits[] = {
856 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
857 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
858 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
859 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
860 0x77u, 0x78u, 0x79u, 0x7au,
861};
862
863static const UChar kUMinus = (UChar)0x002d;
864
73c04bcf 865#ifdef RBNF_DEBUG
b75a7d8f
A
866static const char kMinus = '-';
867
868static const uint8_t digitInfo[] = {
869 0, 0, 0, 0, 0, 0, 0, 0,
870 0, 0, 0, 0, 0, 0, 0, 0,
871 0, 0, 0, 0, 0, 0, 0, 0,
872 0, 0, 0, 0, 0, 0, 0, 0,
873 0, 0, 0, 0, 0, 0, 0, 0,
874 0, 0, 0, 0, 0, 0, 0, 0,
875 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
876 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
877 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
878 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
879 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
880 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
881 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
882 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
883 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
884 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
885};
886
b75a7d8f
A
887int64_t util64_atoi(const char* str, uint32_t radix)
888{
889 if (radix > 36) {
890 radix = 36;
891 } else if (radix < 2) {
892 radix = 2;
893 }
894 int64_t lradix = radix;
895
896 int neg = 0;
897 if (*str == kMinus) {
898 ++str;
899 neg = 1;
900 }
901 int64_t result = 0;
902 uint8_t b;
903 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
904 result *= lradix;
905 result += (int32_t)b;
906 }
907 if (neg) {
908 result = -result;
909 }
910 return result;
911}
b75a7d8f
A
912
913int64_t util64_utoi(const UChar* str, uint32_t radix)
914{
915 if (radix > 36) {
916 radix = 36;
917 } else if (radix < 2) {
918 radix = 2;
919 }
920 int64_t lradix = radix;
921
922 int neg = 0;
923 if (*str == kUMinus) {
924 ++str;
925 neg = 1;
926 }
927 int64_t result = 0;
928 UChar c;
929 uint8_t b;
930 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
931 result *= lradix;
932 result += (int32_t)b;
933 }
934 if (neg) {
935 result = -result;
936 }
937 return result;
938}
939
b75a7d8f
A
940uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
941{
942 if (radix > 36) {
943 radix = 36;
944 } else if (radix < 2) {
945 radix = 2;
946 }
947 int64_t base = radix;
948
949 char* p = buf;
950 if (len && (w < 0) && (radix == 10) && !raw) {
951 w = -w;
952 *p++ = kMinus;
953 --len;
954 } else if (len && (w == 0)) {
955 *p++ = (char)raw ? 0 : asciiDigits[0];
956 --len;
957 }
958
959 while (len && w != 0) {
960 int64_t n = w / base;
961 int64_t m = n * base;
962 int32_t d = (int32_t)(w-m);
963 *p++ = raw ? (char)d : asciiDigits[d];
964 w = n;
965 --len;
966 }
967 if (len) {
968 *p = 0; // null terminate if room for caller convenience
969 }
970
971 len = p - buf;
972 if (*buf == kMinus) {
973 ++buf;
974 }
975 while (--p > buf) {
976 char c = *p;
977 *p = *buf;
978 *buf = c;
979 ++buf;
980 }
981
982 return len;
983}
984#endif
985
986uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
987{
988 if (radix > 36) {
989 radix = 36;
990 } else if (radix < 2) {
991 radix = 2;
992 }
993 int64_t base = radix;
994
995 UChar* p = buf;
996 if (len && (w < 0) && (radix == 10) && !raw) {
997 w = -w;
998 *p++ = kUMinus;
999 --len;
1000 } else if (len && (w == 0)) {
1001 *p++ = (UChar)raw ? 0 : asciiDigits[0];
1002 --len;
1003 }
1004
1005 while (len && (w != 0)) {
1006 int64_t n = w / base;
1007 int64_t m = n * base;
1008 int32_t d = (int32_t)(w-m);
1009 *p++ = (UChar)(raw ? d : asciiDigits[d]);
1010 w = n;
1011 --len;
1012 }
1013 if (len) {
1014 *p = 0; // null terminate if room for caller convenience
1015 }
1016
1017 len = (uint32_t)(p - buf);
1018 if (*buf == kUMinus) {
1019 ++buf;
1020 }
1021 while (--p > buf) {
1022 UChar c = *p;
1023 *p = *buf;
1024 *buf = c;
1025 ++buf;
1026 }
1027
1028 return len;
1029}
1030
1031
1032U_NAMESPACE_END
1033
1034/* U_HAVE_RBNF */
1035#endif