icuSources/i18n/nfrs.cpp

   1 /*
   2 ******************************************************************************
   3 *   Copyright (C) 1997-2015, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 ******************************************************************************
   6 *   file name:  nfrs.cpp
   7 *   encoding:   US-ASCII
   8 *   tab size:   8 (not used)
   9 *   indentation:4
  10 *
  11 * Modification history
  12 * Date        Name      Comments
  13 * 10/11/2001  Doug      Ported from ICU4J
  14 */
  15
  16 #include "nfrs.h"
  17
  18 #if U_HAVE_RBNF
  19
  20 #include "unicode/uchar.h"
  21 #include "nfrule.h"
  22 #include "nfrlist.h"
  23 #include "patternprops.h"
  24
  25 #ifdef RBNF_DEBUG
  26 #include "cmemory.h"
  27 #endif
  28
  29 enum {
  30     /** -x */
  31     NEGATIVE_RULE_INDEX = 0,
  32     /** x.x */
  33     IMPROPER_FRACTION_RULE_INDEX = 1,
  34     /** 0.x */
  35     PROPER_FRACTION_RULE_INDEX = 2,
  36     /** x.0 */
  37     MASTER_RULE_INDEX = 3,
  38     /** Inf */
  39     INFINITY_RULE_INDEX = 4,
  40     /** NaN */
  41     NAN_RULE_INDEX = 5,
  42     NON_NUMERICAL_RULE_LENGTH = 6
  43 };
  44
  45 U_NAMESPACE_BEGIN
  46
  47 #if 0
  48 // euclid's algorithm works with doubles
  49 // note, doubles only get us up to one quadrillion or so, which
  50 // isn't as much range as we get with longs.  We probably still
  51 // want either 64-bit math, or BigInteger.
  52
  53 static int64_t
  54 util_lcm(int64_t x, int64_t y)
  55 {
  56     x.abs();
  57     y.abs();
  58
  59     if (x == 0 || y == 0) {
  60         return 0;
  61     } else {
  62         do {
  63             if (x < y) {
  64                 int64_t t = x; x = y; y = t;
  65             }
  66             x -= y * (x/y);
  67         } while (x != 0);
  68
  69         return y;
  70     }
  71 }
  72
  73 #else
  74 /**
  75  * Calculates the least common multiple of x and y.
  76  */
  77 static int64_t
  78 util_lcm(int64_t x, int64_t y)
  79 {
  80     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
  81     // vol. 2, 1st ed., pp. 298-299
  82     int64_t x1 = x;
  83     int64_t y1 = y;
  84
  85     int p2 = 0;
  86     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
  87         ++p2;
  88         x1 >>= 1;
  89         y1 >>= 1;
  90     }
  91
  92     int64_t t;
  93     if ((x1 & 1) == 1) {
  94         t = -y1;
  95     } else {
  96         t = x1;
  97     }
  98
  99     while (t != 0) {
 100         while ((t & 1) == 0) {
 101             t = t >> 1;
 102         }
 103         if (t > 0) {
 104             x1 = t;
 105         } else {
 106             y1 = -t;
 107         }
 108         t = x1 - y1;
 109     }
 110
 111     int64_t gcd = x1 << p2;
 112
 113     // x * y == gcd(x, y) * lcm(x, y)
 114     return x / gcd * y;
 115 }
 116 #endif
 117
 118 static const UChar gPercent = 0x0025;
 119 static const UChar gColon = 0x003a;
 120 static const UChar gSemicolon = 0x003b;
 121 static const UChar gLineFeed = 0x000a;
 122
 123 static const UChar gPercentPercent[] =
 124 {
 125     0x25, 0x25, 0
 126 }; /* "%%" */
 127
 128 static const UChar gNoparse[] =
 129 {
 130     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
 131 }; /* "@noparse" */
 132
 133 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
 134   : name()
 135   , rules(0)
 136   , owner(_owner)
 137   , fractionRules()
 138   , fIsFractionRuleSet(FALSE)
 139   , fIsPublic(FALSE)
 140   , fIsParseable(TRUE)
 141 {
 142     for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
 143         nonNumericalRules[i] = NULL;
 144     }
 145
 146     if (U_FAILURE(status)) {
 147         return;
 148     }
 149
 150     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
 151
 152     if (description.length() == 0) {
 153         // throw new IllegalArgumentException("Empty rule set description");
 154         status = U_PARSE_ERROR;
 155         return;
 156     }
 157
 158     // if the description begins with a rule set name (the rule set
 159     // name can be omitted in formatter descriptions that consist
 160     // of only one rule set), copy it out into our "name" member
 161     // and delete it from the description
 162     if (description.charAt(0) == gPercent) {
 163         int32_t pos = description.indexOf(gColon);
 164         if (pos == -1) {
 165             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
 166             status = U_PARSE_ERROR;
 167         } else {
 168             name.setTo(description, 0, pos);
 169             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
 170             }
 171             description.remove(0, pos);
 172         }
 173     } else {
 174         name.setTo(UNICODE_STRING_SIMPLE("%default"));
 175     }
 176
 177     if (description.length() == 0) {
 178         // throw new IllegalArgumentException("Empty rule set description");
 179         status = U_PARSE_ERROR;
 180     }
 181
 182     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
 183
 184     if ( name.endsWith(gNoparse,8) ) {
 185         fIsParseable = FALSE;
 186         name.truncate(name.length()-8); // remove the @noparse from the name
 187     }
 188
 189     // all of the other members of NFRuleSet are initialized
 190     // by parseRules()
 191 }
 192
 193 void
 194 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
 195 {
 196     // start by creating a Vector whose elements are Strings containing
 197     // the descriptions of the rules (one rule per element).  The rules
 198     // are separated by semicolons (there's no escape facility: ALL
 199     // semicolons are rule delimiters)
 200
 201     if (U_FAILURE(status)) {
 202         return;
 203     }
 204
 205     // ensure we are starting with an empty rule list
 206     rules.deleteAll();
 207
 208     // dlf - the original code kept a separate description array for no reason,
 209     // so I got rid of it.  The loop was too complex so I simplified it.
 210
 211     UnicodeString currentDescription;
 212     int32_t oldP = 0;
 213     while (oldP < description.length()) {
 214         int32_t p = description.indexOf(gSemicolon, oldP);
 215         if (p == -1) {
 216             p = description.length();
 217         }
 218         currentDescription.setTo(description, oldP, p - oldP);
 219         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
 220         oldP = p + 1;
 221     }
 222
 223     // for rules that didn't specify a base value, their base values
 224     // were initialized to 0.  Make another pass through the list and
 225     // set all those rules' base values.  We also remove any special
 226     // rules from the list and put them into their own member variables
 227     int64_t defaultBaseValue = 0;
 228
 229     // (this isn't a for loop because we might be deleting items from
 230     // the vector-- we want to make sure we only increment i when
 231     // we _didn't_ delete aything from the vector)
 232     int32_t rulesSize = rules.size();
 233     for (int32_t i = 0; i < rulesSize; i++) {
 234         NFRule* rule = rules[i];
 235         int64_t baseValue = rule->getBaseValue();
 236
 237         if (baseValue == 0) {
 238             // if the rule's base value is 0, fill in a default
 239             // base value (this will be 1 plus the preceding
 240             // rule's base value for regular rule sets, and the
 241             // same as the preceding rule's base value in fraction
 242             // rule sets)
 243             rule->setBaseValue(defaultBaseValue, status);
 244         }
 245         else {
 246             // if it's a regular rule that already knows its base value,
 247             // check to make sure the rules are in order, and update
 248             // the default base value for the next rule
 249             if (baseValue < defaultBaseValue) {
 250                 // throw new IllegalArgumentException("Rules are not in order");
 251                 status = U_PARSE_ERROR;
 252                 return;
 253             }
 254             defaultBaseValue = baseValue;
 255         }
 256         if (!fIsFractionRuleSet) {
 257             ++defaultBaseValue;
 258         }
 259     }
 260 }
 261
 262 /**
 263  * Set one of the non-numerical rules.
 264  * @param rule The rule to set.
 265  */
 266 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
 267     int64_t baseValue = rule->getBaseValue();
 268     if (baseValue == NFRule::kNegativeNumberRule) {
 269         delete nonNumericalRules[NEGATIVE_RULE_INDEX];
 270         nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
 271     }
 272     else if (baseValue == NFRule::kImproperFractionRule) {
 273         setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
 274     }
 275     else if (baseValue == NFRule::kProperFractionRule) {
 276         setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
 277     }
 278     else if (baseValue == NFRule::kMasterRule) {
 279         setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
 280     }
 281     else if (baseValue == NFRule::kInfinityRule) {
 282         delete nonNumericalRules[INFINITY_RULE_INDEX];
 283         nonNumericalRules[INFINITY_RULE_INDEX] = rule;
 284     }
 285     else if (baseValue == NFRule::kNaNRule) {
 286         delete nonNumericalRules[NAN_RULE_INDEX];
 287         nonNumericalRules[NAN_RULE_INDEX] = rule;
 288     }
 289 }
 290
 291 /**
 292  * Determine the best fraction rule to use. Rules matching the decimal point from
 293  * DecimalFormatSymbols become the main set of rules to use.
 294  * @param originalIndex The index into nonNumericalRules
 295  * @param newRule The new rule to consider
 296  * @param rememberRule Should the new rule be added to fractionRules.
 297  */
 298 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
 299     if (rememberRule) {
 300         fractionRules.add(newRule);
 301     }
 302     NFRule *bestResult = nonNumericalRules[originalIndex];
 303     if (bestResult == NULL) {
 304         nonNumericalRules[originalIndex] = newRule;
 305     }
 306     else {
 307         // We have more than one. Which one is better?
 308         const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
 309         if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
 310             == newRule->getDecimalPoint())
 311         {
 312             nonNumericalRules[originalIndex] = newRule;
 313         }
 314         // else leave it alone
 315     }
 316 }
 317
 318 NFRuleSet::~NFRuleSet()
 319 {
 320     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
 321         if (i != IMPROPER_FRACTION_RULE_INDEX
 322             && i != PROPER_FRACTION_RULE_INDEX
 323             && i != MASTER_RULE_INDEX)
 324         {
 325             delete nonNumericalRules[i];
 326         }
 327         // else it will be deleted via NFRuleList fractionRules
 328     }
 329 }
 330
 331 static UBool
 332 util_equalRules(const NFRule* rule1, const NFRule* rule2)
 333 {
 334     if (rule1) {
 335         if (rule2) {
 336             return *rule1 == *rule2;
 337         }
 338     } else if (!rule2) {
 339         return TRUE;
 340     }
 341     return FALSE;
 342 }
 343
 344 UBool
 345 NFRuleSet::operator==(const NFRuleSet& rhs) const
 346 {
 347     if (rules.size() == rhs.rules.size() &&
 348         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
 349         name == rhs.name) {
 350
 351         // ...then compare the non-numerical rule lists...
 352         for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
 353             if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
 354                 return FALSE;
 355             }
 356         }
 357
 358         // ...then compare the rule lists...
 359         for (uint32_t i = 0; i < rules.size(); ++i) {
 360             if (*rules[i] != *rhs.rules[i]) {
 361                 return FALSE;
 362             }
 363         }
 364         return TRUE;
 365     }
 366     return FALSE;
 367 }
 368
 369 void
 370 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
 371     for (uint32_t i = 0; i < rules.size(); ++i) {
 372         rules[i]->setDecimalFormatSymbols(newSymbols, status);
 373     }
 374     // Switch the fraction rules to mirror the DecimalFormatSymbols.
 375     for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
 376         if (nonNumericalRules[nonNumericalIdx]) {
 377             for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
 378                 NFRule *fractionRule = fractionRules[fIdx];
 379                 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
 380                     setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
 381                 }
 382             }
 383         }
 384     }
 385
 386     for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
 387         NFRule *rule = nonNumericalRules[nnrIdx];
 388         if (rule) {
 389             rule->setDecimalFormatSymbols(newSymbols, status);
 390         }
 391     }
 392 }
 393
 394 #define RECURSION_LIMIT 64
 395
 396 void
 397 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
 398 {
 399     if (recursionCount >= RECURSION_LIMIT) {
 400         // stop recursion
 401         status = U_INVALID_STATE_ERROR;
 402         return;
 403     }
 404     const NFRule *rule = findNormalRule(number);
 405     if (rule) { // else error, but can't report it
 406         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
 407     }
 408 }
 409
 410 void
 411 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
 412 {
 413     if (recursionCount >= RECURSION_LIMIT) {
 414         // stop recursion
 415         status = U_INVALID_STATE_ERROR;
 416         return;
 417     }
 418     const NFRule *rule = findDoubleRule(number);
 419     if (rule) { // else error, but can't report it
 420         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
 421     }
 422 }
 423
 424 const NFRule*
 425 NFRuleSet::findDoubleRule(double number) const
 426 {
 427     // if this is a fraction rule set, use findFractionRuleSetRule()
 428     if (isFractionRuleSet()) {
 429         return findFractionRuleSetRule(number);
 430     }
 431
 432     if (uprv_isNaN(number)) {
 433         const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
 434         if (!rule) {
 435             rule = owner->getDefaultNaNRule();
 436         }
 437         return rule;
 438     }
 439
 440     // if the number is negative, return the negative number rule
 441     // (if there isn't a negative-number rule, we pretend it's a
 442     // positive number)
 443     if (number < 0) {
 444         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
 445             return  nonNumericalRules[NEGATIVE_RULE_INDEX];
 446         } else {
 447             number = -number;
 448         }
 449     }
 450
 451     if (uprv_isInfinite(number)) {
 452         const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
 453         if (!rule) {
 454             rule = owner->getDefaultInfinityRule();
 455         }
 456         return rule;
 457     }
 458
 459     // if the number isn't an integer, we use one of the fraction rules...
 460     if (number != uprv_floor(number)) {
 461         // if the number is between 0 and 1, return the proper
 462         // fraction rule
 463         if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
 464             return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
 465         }
 466         // otherwise, return the improper fraction rule
 467         else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
 468             return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
 469         }
 470     }
 471
 472     // if there's a master rule, use it to format the number
 473     if (nonNumericalRules[MASTER_RULE_INDEX]) {
 474         return nonNumericalRules[MASTER_RULE_INDEX];
 475     }
 476
 477     // and if we haven't yet returned a rule, use findNormalRule()
 478     // to find the applicable rule
 479     int64_t r = util64_fromDouble(number + 0.5);
 480     return findNormalRule(r);
 481 }
 482
 483 const NFRule *
 484 NFRuleSet::findNormalRule(int64_t number) const
 485 {
 486     // if this is a fraction rule set, use findFractionRuleSetRule()
 487     // to find the rule (we should only go into this clause if the
 488     // value is 0)
 489     if (fIsFractionRuleSet) {
 490         return findFractionRuleSetRule((double)number);
 491     }
 492
 493     // if the number is negative, return the negative-number rule
 494     // (if there isn't one, pretend the number is positive)
 495     if (number < 0) {
 496         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
 497             return nonNumericalRules[NEGATIVE_RULE_INDEX];
 498         } else {
 499             number = -number;
 500         }
 501     }
 502
 503     // we have to repeat the preceding two checks, even though we
 504     // do them in findRule(), because the version of format() that
 505     // takes a long bypasses findRule() and goes straight to this
 506     // function.  This function does skip the fraction rules since
 507     // we know the value is an integer (it also skips the master
 508     // rule, since it's considered a fraction rule.  Skipping the
 509     // master rule in this function is also how we avoid infinite
 510     // recursion)
 511
 512     // {dlf} unfortunately this fails if there are no rules except
 513     // special rules.  If there are no rules, use the master rule.
 514
 515     // binary-search the rule list for the applicable rule
 516     // (a rule is used for all values from its base value to
 517     // the next rule's base value)
 518     int32_t hi = rules.size();
 519     if (hi > 0) {
 520         int32_t lo = 0;
 521
 522         while (lo < hi) {
 523             int32_t mid = (lo + hi) / 2;
 524             if (rules[mid]->getBaseValue() == number) {
 525                 return rules[mid];
 526             }
 527             else if (rules[mid]->getBaseValue() > number) {
 528                 hi = mid;
 529             }
 530             else {
 531                 lo = mid + 1;
 532             }
 533         }
 534         if (hi == 0) { // bad rule set, minimum base > 0
 535             return NULL; // want to throw exception here
 536         }
 537
 538         NFRule *result = rules[hi - 1];
 539
 540         // use shouldRollBack() to see whether we need to invoke the
 541         // rollback rule (see shouldRollBack()'s documentation for
 542         // an explanation of the rollback rule).  If we do, roll back
 543         // one rule and return that one instead of the one we'd normally
 544         // return
 545         if (result->shouldRollBack((double)number)) {
 546             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
 547                 return NULL;
 548             }
 549             result = rules[hi - 2];
 550         }
 551         return result;
 552     }
 553     // else use the master rule
 554     return nonNumericalRules[MASTER_RULE_INDEX];
 555 }
 556
 557 /**
 558  * If this rule is a fraction rule set, this function is used by
 559  * findRule() to select the most appropriate rule for formatting
 560  * the number.  Basically, the base value of each rule in the rule
 561  * set is treated as the denominator of a fraction.  Whichever
 562  * denominator can produce the fraction closest in value to the
 563  * number passed in is the result.  If there's a tie, the earlier
 564  * one in the list wins.  (If there are two rules in a row with the
 565  * same base value, the first one is used when the numerator of the
 566  * fraction would be 1, and the second rule is used the rest of the
 567  * time.
 568  * @param number The number being formatted (which will always be
 569  * a number between 0 and 1)
 570  * @return The rule to use to format this number
 571  */
 572 const NFRule*
 573 NFRuleSet::findFractionRuleSetRule(double number) const
 574 {
 575     // the obvious way to do this (multiply the value being formatted
 576     // by each rule's base value until you get an integral result)
 577     // doesn't work because of rounding error.  This method is more
 578     // accurate
 579
 580     // find the least common multiple of the rules' base values
 581     // and multiply this by the number being formatted.  This is
 582     // all the precision we need, and we can do all of the rest
 583     // of the math using integer arithmetic
 584     int64_t leastCommonMultiple = rules[0]->getBaseValue();
 585     int64_t numerator;
 586     {
 587         for (uint32_t i = 1; i < rules.size(); ++i) {
 588             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
 589         }
 590         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
 591     }
 592     // for each rule, do the following...
 593     int64_t tempDifference;
 594     int64_t difference = util64_fromDouble(uprv_maxMantissa());
 595     int32_t winner = 0;
 596     for (uint32_t i = 0; i < rules.size(); ++i) {
 597         // "numerator" is the numerator of the fraction if the
 598         // denominator is the LCD.  The numerator if the rule's
 599         // base value is the denominator is "numerator" times the
 600         // base value divided bythe LCD.  Here we check to see if
 601         // that's an integer, and if not, how close it is to being
 602         // an integer.
 603         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
 604
 605
 606         // normalize the result of the above calculation: we want
 607         // the numerator's distance from the CLOSEST multiple
 608         // of the LCD
 609         if (leastCommonMultiple - tempDifference < tempDifference) {
 610             tempDifference = leastCommonMultiple - tempDifference;
 611         }
 612
 613         // if this is as close as we've come, keep track of how close
 614         // that is, and the line number of the rule that did it.  If
 615         // we've scored a direct hit, we don't have to look at any more
 616         // rules
 617         if (tempDifference < difference) {
 618             difference = tempDifference;
 619             winner = i;
 620             if (difference == 0) {
 621                 break;
 622             }
 623         }
 624     }
 625
 626     // if we have two successive rules that both have the winning base
 627     // value, then the first one (the one we found above) is used if
 628     // the numerator of the fraction is 1 and the second one is used if
 629     // the numerator of the fraction is anything else (this lets us
 630     // do things like "one third"/"two thirds" without haveing to define
 631     // a whole bunch of extra rule sets)
 632     if ((unsigned)(winner + 1) < rules.size() &&
 633         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
 634         double n = ((double)rules[winner]->getBaseValue()) * number;
 635         if (n < 0.5 || n >= 2) {
 636             ++winner;
 637         }
 638     }
 639
 640     // finally, return the winning rule
 641     return rules[winner];
 642 }
 643
 644 /**
 645  * Parses a string.  Matches the string to be parsed against each
 646  * of its rules (with a base value less than upperBound) and returns
 647  * the value produced by the rule that matched the most charcters
 648  * in the source string.
 649  * @param text The string to parse
 650  * @param parsePosition The initial position is ignored and assumed
 651  * to be 0.  On exit, this object has been updated to point to the
 652  * first character position this rule set didn't consume.
 653  * @param upperBound Limits the rules that can be allowed to match.
 654  * Only rules whose base values are strictly less than upperBound
 655  * are considered.
 656  * @return The numerical result of parsing this string.  This will
 657  * be the matching rule's base value, composed appropriately with
 658  * the results of matching any of its substitutions.  The object
 659  * will be an instance of Long if it's an integral value; otherwise,
 660  * it will be an instance of Double.  This function always returns
 661  * a valid object: If nothing matched the input string at all,
 662  * this function returns new Long(0), and the parse position is
 663  * left unchanged.
 664  */
 665 #ifdef RBNF_DEBUG
 666 #include <stdio.h>
 667
 668 static void dumpUS(FILE* f, const UnicodeString& us) {
 669   int len = us.length();
 670   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
 671   if (buf != NULL) {
 672           us.extract(0, len, buf);
 673           buf[len] = 0;
 674           fprintf(f, "%s", buf);
 675           uprv_free(buf); //delete[] buf;
 676   }
 677 }
 678 #endif
 679
 680 UBool
 681 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result, UBool lenient) const
 682 {
 683     // try matching each rule in the rule set against the text being
 684     // parsed.  Whichever one matches the most characters is the one
 685     // that determines the value we return.
 686
 687     result.setLong(0);
 688
 689     // dump out if there's no text to parse
 690     if (text.length() == 0) {
 691         return 0;
 692     }
 693
 694     ParsePosition highWaterMark;
 695     ParsePosition workingPos = pos;
 696
 697 #ifdef RBNF_DEBUG
 698     fprintf(stderr, "<nfrs> %x '", this);
 699     dumpUS(stderr, name);
 700     fprintf(stderr, "' text '");
 701     dumpUS(stderr, text);
 702     fprintf(stderr, "'\n");
 703     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
 704 #endif
 705     // Try each of the negative rules, fraction rules, infinity rules and NaN rules
 706     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
 707         if (nonNumericalRules[i]) {
 708             Formattable tempResult;
 709             UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult, lenient || isDecimalFormatRuleParseable() );
 710             if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
 711                 result = tempResult;
 712                 highWaterMark = workingPos;
 713             }
 714             workingPos = pos;
 715         }
 716     }
 717 #ifdef RBNF_DEBUG
 718     fprintf(stderr, "<nfrs> continue other with text '");
 719     dumpUS(stderr, text);
 720     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
 721 #endif
 722
 723     // finally, go through the regular rules one at a time.  We start
 724     // at the end of the list because we want to try matching the most
 725     // sigificant rule first (this helps ensure that we parse
 726     // "five thousand three hundred six" as
 727     // "(five thousand) (three hundred) (six)" rather than
 728     // "((five thousand three) hundred) (six)").  Skip rules whose
 729     // base values are higher than the upper bound (again, this helps
 730     // limit ambiguity by making sure the rules that match a rule's
 731     // are less significant than the rule containing the substitutions)/
 732     {
 733         int64_t ub = util64_fromDouble(upperBound);
 734 #ifdef RBNF_DEBUG
 735         {
 736             char ubstr[64];
 737             util64_toa(ub, ubstr, 64);
 738             char ubstrhex[64];
 739             util64_toa(ub, ubstrhex, 64, 16);
 740             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
 741         }
 742 #endif
 743         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
 744             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
 745                 continue;
 746             }
 747             Formattable tempResult;
 748             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
 749             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
 750                 result = tempResult;
 751                 highWaterMark = workingPos;
 752             }
 753             workingPos = pos;
 754         }
 755     }
 756 #ifdef RBNF_DEBUG
 757     fprintf(stderr, "<nfrs> exit\n");
 758 #endif
 759     // finally, update the parse postion we were passed to point to the
 760     // first character we didn't use, and return the result that
 761     // corresponds to that string of characters
 762     pos = highWaterMark;
 763
 764     return 1;
 765 }
 766
 767 void
 768 NFRuleSet::appendRules(UnicodeString& result) const
 769 {
 770     uint32_t i;
 771
 772     // the rule set name goes first...
 773     result.append(name);
 774     result.append(gColon);
 775     result.append(gLineFeed);
 776
 777     // followed by the regular rules...
 778     for (i = 0; i < rules.size(); i++) {
 779         rules[i]->_appendRuleText(result);
 780         result.append(gLineFeed);
 781     }
 782
 783     // followed by the special rules (if they exist)
 784     for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
 785         NFRule *rule = nonNumericalRules[i];
 786         if (nonNumericalRules[i]) {
 787             if (rule->getBaseValue() == NFRule::kImproperFractionRule
 788                 || rule->getBaseValue() == NFRule::kProperFractionRule
 789                 || rule->getBaseValue() == NFRule::kMasterRule)
 790             {
 791                 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
 792                     NFRule *fractionRule = fractionRules[fIdx];
 793                     if (fractionRule->getBaseValue() == rule->getBaseValue()) {
 794                         fractionRule->_appendRuleText(result);
 795                         result.append(gLineFeed);
 796                     }
 797                 }
 798             }
 799             else {
 800                 rule->_appendRuleText(result);
 801                 result.append(gLineFeed);
 802             }
 803         }
 804     }
 805 }
 806
 807 // utility functions
 808
 809 int64_t util64_fromDouble(double d) {
 810     int64_t result = 0;
 811     if (!uprv_isNaN(d)) {
 812         double mant = uprv_maxMantissa();
 813         if (d < -mant) {
 814             d = -mant;
 815         } else if (d > mant) {
 816             d = mant;
 817         }
 818         UBool neg = d < 0;
 819         if (neg) {
 820             d = -d;
 821         }
 822         result = (int64_t)uprv_floor(d);
 823         if (neg) {
 824             result = -result;
 825         }
 826     }
 827     return result;
 828 }
 829
 830 int64_t util64_pow(int32_t r, uint32_t e)  {
 831     if (r == 0) {
 832         return 0;
 833     } else if (e == 0) {
 834         return 1;
 835     } else {
 836         int64_t n = r;
 837         while (--e > 0) {
 838             n *= r;
 839         }
 840         return n;
 841     }
 842 }
 843
 844 static const uint8_t asciiDigits[] = {
 845     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
 846     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
 847     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
 848     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
 849     0x77u, 0x78u, 0x79u, 0x7au,
 850 };
 851
 852 static const UChar kUMinus = (UChar)0x002d;
 853
 854 #ifdef RBNF_DEBUG
 855 static const char kMinus = '-';
 856
 857 static const uint8_t digitInfo[] = {
 858         0,     0,     0,     0,     0,     0,     0,     0,
 859         0,     0,     0,     0,     0,     0,     0,     0,
 860         0,     0,     0,     0,     0,     0,     0,     0,
 861         0,     0,     0,     0,     0,     0,     0,     0,
 862         0,     0,     0,     0,     0,     0,     0,     0,
 863         0,     0,     0,     0,     0,     0,     0,     0,
 864     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
 865     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
 866         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
 867     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
 868     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
 869     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
 870         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
 871     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
 872     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
 873     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
 874 };
 875
 876 int64_t util64_atoi(const char* str, uint32_t radix)
 877 {
 878     if (radix > 36) {
 879         radix = 36;
 880     } else if (radix < 2) {
 881         radix = 2;
 882     }
 883     int64_t lradix = radix;
 884
 885     int neg = 0;
 886     if (*str == kMinus) {
 887         ++str;
 888         neg = 1;
 889     }
 890     int64_t result = 0;
 891     uint8_t b;
 892     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
 893         result *= lradix;
 894         result += (int32_t)b;
 895     }
 896     if (neg) {
 897         result = -result;
 898     }
 899     return result;
 900 }
 901
 902 int64_t util64_utoi(const UChar* str, uint32_t radix)
 903 {
 904     if (radix > 36) {
 905         radix = 36;
 906     } else if (radix < 2) {
 907         radix = 2;
 908     }
 909     int64_t lradix = radix;
 910
 911     int neg = 0;
 912     if (*str == kUMinus) {
 913         ++str;
 914         neg = 1;
 915     }
 916     int64_t result = 0;
 917     UChar c;
 918     uint8_t b;
 919     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
 920         result *= lradix;
 921         result += (int32_t)b;
 922     }
 923     if (neg) {
 924         result = -result;
 925     }
 926     return result;
 927 }
 928
 929 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
 930 {
 931     if (radix > 36) {
 932         radix = 36;
 933     } else if (radix < 2) {
 934         radix = 2;
 935     }
 936     int64_t base = radix;
 937
 938     char* p = buf;
 939     if (len && (w < 0) && (radix == 10) && !raw) {
 940         w = -w;
 941         *p++ = kMinus;
 942         --len;
 943     } else if (len && (w == 0)) {
 944         *p++ = (char)raw ? 0 : asciiDigits[0];
 945         --len;
 946     }
 947
 948     while (len && w != 0) {
 949         int64_t n = w / base;
 950         int64_t m = n * base;
 951         int32_t d = (int32_t)(w-m);
 952         *p++ = raw ? (char)d : asciiDigits[d];
 953         w = n;
 954         --len;
 955     }
 956     if (len) {
 957         *p = 0; // null terminate if room for caller convenience
 958     }
 959
 960     len = p - buf;
 961     if (*buf == kMinus) {
 962         ++buf;
 963     }
 964     while (--p > buf) {
 965         char c = *p;
 966         *p = *buf;
 967         *buf = c;
 968         ++buf;
 969     }
 970
 971     return len;
 972 }
 973 #endif
 974
 975 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
 976 {
 977     if (radix > 36) {
 978         radix = 36;
 979     } else if (radix < 2) {
 980         radix = 2;
 981     }
 982     int64_t base = radix;
 983
 984     UChar* p = buf;
 985     if (len && (w < 0) && (radix == 10) && !raw) {
 986         w = -w;
 987         *p++ = kUMinus;
 988         --len;
 989     } else if (len && (w == 0)) {
 990         *p++ = (UChar)raw ? 0 : asciiDigits[0];
 991         --len;
 992     }
 993
 994     while (len && (w != 0)) {
 995         int64_t n = w / base;
 996         int64_t m = n * base;
 997         int32_t d = (int32_t)(w-m);
 998         *p++ = (UChar)(raw ? d : asciiDigits[d]);
 999         w = n;
1000         --len;
1001     }
1002     if (len) {
1003         *p = 0; // null terminate if room for caller convenience
1004     }
1005
1006     len = (uint32_t)(p - buf);
1007     if (*buf == kUMinus) {
1008         ++buf;
1009     }
1010     while (--p > buf) {
1011         UChar c = *p;
1012         *p = *buf;
1013         *buf = c;
1014         ++buf;
1015     }
1016
1017     return len;
1018 }
1019
1020
1021 U_NAMESPACE_END
1022
1023 /* U_HAVE_RBNF */
1024 #endif
1025