2 ******************************************************************************
3 * Copyright (C) 1997-2001, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
8 * tab size: 8 (not used)
11 * Modification history
13 * 10/11/2001 Doug Ported from ICU4J
20 #include "unicode/uchar.h"
33 // euclid's algorithm works with doubles
34 // note, doubles only get us up to one quadrillion or so, which
35 // isn't as much range as we get with longs. We probably still
36 // want either 64-bit math, or BigInteger.
39 util_lcm(int64_t x
, int64_t y
)
44 if (x
== 0 || y
== 0) {
49 int64_t t
= x
; x
= y
; y
= t
;
60 * Calculates the least common multiple of x and y.
63 util_lcm(int64_t x
, int64_t y
)
65 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
66 // vol. 2, 1st ed., pp. 298-299
71 while ((x1
& 1) == 0 && (y1
& 1) == 0) {
85 while ((t
& 1) == 0) {
96 int64_t gcd
= x1
<< p2
;
98 // x * y == gcd(x, y) * lcm(x, y)
103 static const UChar gPercent
= 0x0025;
104 static const UChar gColon
= 0x003a;
105 static const UChar gSemicolon
= 0x003b;
106 static const UChar gLineFeed
= 0x000a;
108 static const UChar gFourSpaces
[] =
110 0x20, 0x20, 0x20, 0x20, 0
112 static const UChar gPercentPercent
[] =
117 NFRuleSet::NFRuleSet(UnicodeString
* descriptions
, int32_t index
, UErrorCode
& status
)
120 , negativeNumberRule(NULL
)
121 , fIsFractionRuleSet(FALSE
)
124 for (int i
= 0; i
< 3; ++i
) {
125 fractionRules
[i
] = NULL
;
128 if (U_FAILURE(status
)) {
132 UnicodeString
& description
= descriptions
[index
]; // !!! make sure index is valid
134 // if the description begins with a rule set name (the rule set
135 // name can be omitted in formatter descriptions that consist
136 // of only one rule set), copy it out into our "name" member
137 // and delete it from the description
138 if (description
.charAt(0) == gPercent
) {
139 int32_t pos
= description
.indexOf(gColon
);
141 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
142 status
= U_PARSE_ERROR
;
144 name
.setTo(description
, 0, pos
);
145 while (pos
< description
.length() && uprv_isRuleWhiteSpace(description
.charAt(++pos
))) {
147 description
.remove(0, pos
);
150 name
.setTo("%default");
153 if (description
.length() == 0) {
154 // throw new IllegalArgumentException("Empty rule set description");
155 status
= U_PARSE_ERROR
;
158 fIsPublic
= name
.indexOf(gPercentPercent
) != 0;
160 // all of the other members of NFRuleSet are initialized
165 NFRuleSet::parseRules(UnicodeString
& description
, const RuleBasedNumberFormat
* owner
, UErrorCode
& status
)
167 // start by creating a Vector whose elements are Strings containing
168 // the descriptions of the rules (one rule per element). The rules
169 // are separated by semicolons (there's no escape facility: ALL
170 // semicolons are rule delimiters)
172 if (U_FAILURE(status
)) {
176 // dlf - the original code kept a separate description array for no reason,
177 // so I got rid of it. The loop was too complex so I simplified it.
179 UnicodeString currentDescription
;
181 while (oldP
< description
.length()) {
182 int32_t p
= description
.indexOf(gSemicolon
, oldP
);
184 p
= description
.length();
186 currentDescription
.setTo(description
, oldP
, p
- oldP
);
187 NFRule::makeRules(currentDescription
, this, rules
.last(), owner
, rules
, status
);
191 // for rules that didn't specify a base value, their base values
192 // were initialized to 0. Make another pass through the list and
193 // set all those rules' base values. We also remove any special
194 // rules from the list and put them into their own member variables
195 int64_t defaultBaseValue
= 0;
197 // (this isn't a for loop because we might be deleting items from
198 // the vector-- we want to make sure we only increment i when
199 // we _didn't_ delete aything from the vector)
201 while (i
< rules
.size()) {
202 NFRule
* rule
= rules
[i
];
204 switch (rule
->getType()) {
205 // if the rule's base value is 0, fill in a default
206 // base value (this will be 1 plus the preceding
207 // rule's base value for regular rule sets, and the
208 // same as the preceding rule's base value in fraction
210 case NFRule::kNoBase
:
211 rule
->setBaseValue(defaultBaseValue
);
212 if (!isFractionRuleSet()) {
218 // if it's the negative-number rule, copy it into its own
219 // data member and delete it from the list
220 case NFRule::kNegativeNumberRule
:
221 negativeNumberRule
= rules
.remove(i
);
224 // if it's the improper fraction rule, copy it into the
225 // correct element of fractionRules
226 case NFRule::kImproperFractionRule
:
227 fractionRules
[0] = rules
.remove(i
);
230 // if it's the proper fraction rule, copy it into the
231 // correct element of fractionRules
232 case NFRule::kProperFractionRule
:
233 fractionRules
[1] = rules
.remove(i
);
236 // if it's the master rule, copy it into the
237 // correct element of fractionRules
238 case NFRule::kMasterRule
:
239 fractionRules
[2] = rules
.remove(i
);
242 // if it's a regular rule that already knows its base value,
243 // check to make sure the rules are in order, and update
244 // the default base value for the next rule
246 if (rule
->getBaseValue() < defaultBaseValue
) {
247 // throw new IllegalArgumentException("Rules are not in order");
248 status
= U_PARSE_ERROR
;
251 defaultBaseValue
= rule
->getBaseValue();
252 if (!isFractionRuleSet()) {
261 NFRuleSet::~NFRuleSet()
263 delete negativeNumberRule
;
264 delete fractionRules
[0];
265 delete fractionRules
[1];
266 delete fractionRules
[2];
270 util_equalRules(const NFRule
* rule1
, const NFRule
* rule2
)
274 return *rule1
== *rule2
;
283 NFRuleSet::operator==(const NFRuleSet
& rhs
) const
285 if (rules
.size() == rhs
.rules
.size() &&
286 fIsFractionRuleSet
== rhs
.fIsFractionRuleSet
&&
288 util_equalRules(negativeNumberRule
, rhs
.negativeNumberRule
) &&
289 util_equalRules(fractionRules
[0], rhs
.fractionRules
[0]) &&
290 util_equalRules(fractionRules
[1], rhs
.fractionRules
[1]) &&
291 util_equalRules(fractionRules
[2], rhs
.fractionRules
[2])) {
293 for (uint32_t i
= 0; i
< rules
.size(); ++i
) {
294 if (*rules
[i
] != *rhs
.rules
[i
]) {
304 NFRuleSet::format(int64_t number
, UnicodeString
& toAppendTo
, int32_t pos
) const
306 NFRule
*rule
= findNormalRule(number
);
307 rule
->doFormat(number
, toAppendTo
, pos
);
311 NFRuleSet::format(double number
, UnicodeString
& toAppendTo
, int32_t pos
) const
313 NFRule
*rule
= findDoubleRule(number
);
314 rule
->doFormat(number
, toAppendTo
, pos
);
318 NFRuleSet::findDoubleRule(double number
) const
320 // if this is a fraction rule set, use findFractionRuleSetRule()
321 if (isFractionRuleSet()) {
322 return findFractionRuleSetRule(number
);
325 // if the number is negative, return the negative number rule
326 // (if there isn't a negative-number rule, we pretend it's a
329 if (negativeNumberRule
) {
330 return negativeNumberRule
;
336 // if the number isn't an integer, we use one of the fraction rules...
337 if (number
!= uprv_floor(number
)) {
338 // if the number is between 0 and 1, return the proper
340 if (number
< 1 && fractionRules
[1]) {
341 return fractionRules
[1];
343 // otherwise, return the improper fraction rule
344 else if (fractionRules
[0]) {
345 return fractionRules
[0];
349 // if there's a master rule, use it to format the number
350 if (fractionRules
[2]) {
351 return fractionRules
[2];
354 // and if we haven't yet returned a rule, use findNormalRule()
355 // to find the applicable rule
356 int64_t r
= util64_fromDouble(number
+ 0.5);
357 return findNormalRule(r
);
361 NFRuleSet::findNormalRule(int64_t number
) const
363 // if this is a fraction rule set, use findFractionRuleSetRule()
364 // to find the rule (we should only go into this clause if the
366 if (fIsFractionRuleSet
) {
367 return findFractionRuleSetRule((double)number
);
370 // if the number is negative, return the negative-number rule
371 // (if there isn't one, pretend the number is positive)
373 if (negativeNumberRule
) {
374 return negativeNumberRule
;
380 // we have to repeat the preceding two checks, even though we
381 // do them in findRule(), because the version of format() that
382 // takes a long bypasses findRule() and goes straight to this
383 // function. This function does skip the fraction rules since
384 // we know the value is an integer (it also skips the master
385 // rule, since it's considered a fraction rule. Skipping the
386 // master rule in this function is also how we avoid infinite
389 // {dlf} unfortunately this fails if there are no rules except
390 // special rules. If there are no rules, use the master rule.
392 // binary-search the rule list for the applicable rule
393 // (a rule is used for all values from its base value to
394 // the next rule's base value)
395 int32_t hi
= rules
.size();
400 int32_t mid
= (lo
+ hi
) / 2;
401 if (rules
[mid
]->getBaseValue() == number
) {
404 else if (rules
[mid
]->getBaseValue() > number
) {
411 NFRule
*result
= rules
[hi
- 1];
413 // use shouldRollBack() to see whether we need to invoke the
414 // rollback rule (see shouldRollBack()'s documentation for
415 // an explanation of the rollback rule). If we do, roll back
416 // one rule and return that one instead of the one we'd normally
418 if (result
->shouldRollBack((double)number
)) {
419 result
= rules
[hi
- 2];
423 // else use the master rule
424 return fractionRules
[2];
428 * If this rule is a fraction rule set, this function is used by
429 * findRule() to select the most appropriate rule for formatting
430 * the number. Basically, the base value of each rule in the rule
431 * set is treated as the denominator of a fraction. Whichever
432 * denominator can produce the fraction closest in value to the
433 * number passed in is the result. If there's a tie, the earlier
434 * one in the list wins. (If there are two rules in a row with the
435 * same base value, the first one is used when the numerator of the
436 * fraction would be 1, and the second rule is used the rest of the
438 * @param number The number being formatted (which will always be
439 * a number between 0 and 1)
440 * @return The rule to use to format this number
443 NFRuleSet::findFractionRuleSetRule(double number
) const
445 // the obvious way to do this (multiply the value being formatted
446 // by each rule's base value until you get an integral result)
447 // doesn't work because of rounding error. This method is more
450 // find the least common multiple of the rules' base values
451 // and multiply this by the number being formatted. This is
452 // all the precision we need, and we can do all of the rest
453 // of the math using integer arithmetic
454 int64_t leastCommonMultiple
= rules
[0]->getBaseValue();
457 for (uint32_t i
= 1; i
< rules
.size(); ++i
) {
458 leastCommonMultiple
= util_lcm(leastCommonMultiple
, rules
[i
]->getBaseValue());
460 numerator
= util64_fromDouble(number
* (double)leastCommonMultiple
+ 0.5);
462 // for each rule, do the following...
463 int64_t tempDifference
;
464 int64_t difference
= util64_fromDouble(uprv_maxMantissa());
466 for (uint32_t i
= 0; i
< rules
.size(); ++i
) {
467 // "numerator" is the numerator of the fraction if the
468 // denominator is the LCD. The numerator if the rule's
469 // base value is the denominator is "numerator" times the
470 // base value divided bythe LCD. Here we check to see if
471 // that's an integer, and if not, how close it is to being
473 tempDifference
= numerator
* rules
[i
]->getBaseValue() % leastCommonMultiple
;
476 // normalize the result of the above calculation: we want
477 // the numerator's distance from the CLOSEST multiple
479 if (leastCommonMultiple
- tempDifference
< tempDifference
) {
480 tempDifference
= leastCommonMultiple
- tempDifference
;
483 // if this is as close as we've come, keep track of how close
484 // that is, and the line number of the rule that did it. If
485 // we've scored a direct hit, we don't have to look at any more
487 if (tempDifference
< difference
) {
488 difference
= tempDifference
;
490 if (difference
== 0) {
496 // if we have two successive rules that both have the winning base
497 // value, then the first one (the one we found above) is used if
498 // the numerator of the fraction is 1 and the second one is used if
499 // the numerator of the fraction is anything else (this lets us
500 // do things like "one third"/"two thirds" without haveing to define
501 // a whole bunch of extra rule sets)
502 if ((unsigned)(winner
+ 1) < rules
.size() &&
503 rules
[winner
+ 1]->getBaseValue() == rules
[winner
]->getBaseValue()) {
504 double n
= ((double)rules
[winner
]->getBaseValue()) * number
;
505 if (n
< 0.5 || n
>= 2) {
510 // finally, return the winning rule
511 return rules
[winner
];
515 * Parses a string. Matches the string to be parsed against each
516 * of its rules (with a base value less than upperBound) and returns
517 * the value produced by the rule that matched the most charcters
518 * in the source string.
519 * @param text The string to parse
520 * @param parsePosition The initial position is ignored and assumed
521 * to be 0. On exit, this object has been updated to point to the
522 * first character position this rule set didn't consume.
523 * @param upperBound Limits the rules that can be allowed to match.
524 * Only rules whose base values are strictly less than upperBound
526 * @return The numerical result of parsing this string. This will
527 * be the matching rule's base value, composed appropriately with
528 * the results of matching any of its substitutions. The object
529 * will be an instance of Long if it's an integral value; otherwise,
530 * it will be an instance of Double. This function always returns
531 * a valid object: If nothing matched the input string at all,
532 * this function returns new Long(0), and the parse position is
538 static void dumpUS(FILE* f
, const UnicodeString
& us
) {
539 int len
= us
.length();
540 char* buf
= (char *)uprv_malloc((len
+1)*sizeof(char)); //new char[len+1];
541 us
.extract(0, len
, buf
);
543 fprintf(f
, "%s", buf
);
544 uprv_free(buf
); //delete[] buf;
549 NFRuleSet::parse(const UnicodeString
& text
, ParsePosition
& pos
, double upperBound
, Formattable
& result
) const
551 // try matching each rule in the rule set against the text being
552 // parsed. Whichever one matches the most characters is the one
553 // that determines the value we return.
557 // dump out if there's no text to parse
558 if (text
.length() == 0) {
562 ParsePosition highWaterMark
;
563 ParsePosition workingPos
= pos
;
566 fprintf(stderr
, "<nfrs> %x '", this);
567 dumpUS(stderr
, name
);
568 fprintf(stderr
, "' text '");
569 dumpUS(stderr
, text
);
570 fprintf(stderr
, "'\n");
571 fprintf(stderr
, " parse negative: %d\n", this, negativeNumberRule
!= 0);
574 // start by trying the negative number rule (if there is one)
575 if (negativeNumberRule
) {
576 Formattable tempResult
;
578 fprintf(stderr
, " <nfrs before negative> %x ub: %g\n", negativeNumberRule
, upperBound
);
580 UBool success
= negativeNumberRule
->doParse(text
, workingPos
, 0, upperBound
, tempResult
);
582 fprintf(stderr
, " <nfrs after negative> success: %d wpi: %d\n", success
, workingPos
.getIndex());
584 if (success
&& workingPos
.getIndex() > highWaterMark
.getIndex()) {
586 highWaterMark
= workingPos
;
591 fprintf(stderr
, "<nfrs> continue fractional with text '");
592 dumpUS(stderr
, text
);
593 fprintf(stderr
, "' hwm: %d\n", highWaterMark
.getIndex());
595 // then try each of the fraction rules
597 for (int i
= 0; i
< 3; i
++) {
598 if (fractionRules
[i
]) {
599 Formattable tempResult
;
600 UBool success
= fractionRules
[i
]->doParse(text
, workingPos
, 0, upperBound
, tempResult
);
601 if (success
&& (workingPos
.getIndex() > highWaterMark
.getIndex())) {
603 highWaterMark
= workingPos
;
610 fprintf(stderr
, "<nfrs> continue other with text '");
611 dumpUS(stderr
, text
);
612 fprintf(stderr
, "' hwm: %d\n", highWaterMark
.getIndex());
615 // finally, go through the regular rules one at a time. We start
616 // at the end of the list because we want to try matching the most
617 // sigificant rule first (this helps ensure that we parse
618 // "five thousand three hundred six" as
619 // "(five thousand) (three hundred) (six)" rather than
620 // "((five thousand three) hundred) (six)"). Skip rules whose
621 // base values are higher than the upper bound (again, this helps
622 // limit ambiguity by making sure the rules that match a rule's
623 // are less significant than the rule containing the substitutions)/
625 int64_t ub
= util64_fromDouble(upperBound
);
629 util64_toa(ub
, ubstr
, 64);
631 util64_toa(ub
, ubstrhex
, 64, 16);
632 fprintf(stderr
, "ub: %g, i64: %s (%s)\n", upperBound
, ubstr
, ubstrhex
);
635 for (int32_t i
= rules
.size(); --i
>= 0 && highWaterMark
.getIndex() < text
.length();) {
636 if ((!fIsFractionRuleSet
) && (rules
[i
]->getBaseValue() >= ub
)) {
639 Formattable tempResult
;
640 UBool success
= rules
[i
]->doParse(text
, workingPos
, fIsFractionRuleSet
, upperBound
, tempResult
);
641 if (success
&& workingPos
.getIndex() > highWaterMark
.getIndex()) {
643 highWaterMark
= workingPos
;
649 fprintf(stderr
, "<nfrs> exit\n");
651 // finally, update the parse postion we were passed to point to the
652 // first character we didn't use, and return the result that
653 // corresponds to that string of characters
660 NFRuleSet::appendRules(UnicodeString
& result
) const
662 // the rule set name goes first...
664 result
.append(gColon
);
665 result
.append(gLineFeed
);
667 // followed by the regular rules...
668 for (uint32_t i
= 0; i
< rules
.size(); i
++) {
669 result
.append(gFourSpaces
);
670 rules
[i
]->appendRuleText(result
);
671 result
.append(gLineFeed
);
674 // followed by the special rules (if they exist)
675 if (negativeNumberRule
) {
676 result
.append(gFourSpaces
);
677 negativeNumberRule
->appendRuleText(result
);
678 result
.append(gLineFeed
);
682 for (uint32_t i
= 0; i
< 3; ++i
) {
683 if (fractionRules
[i
]) {
684 result
.append(gFourSpaces
);
685 fractionRules
[i
]->appendRuleText(result
);
686 result
.append(gLineFeed
);
694 int64_t util64_fromDouble(double d
) {
696 if (!uprv_isNaN(d
)) {
697 double mant
= uprv_maxMantissa();
700 } else if (d
> mant
) {
707 result
= (int64_t)uprv_floor(d
);
715 int64_t util64_pow(int32_t r
, uint32_t e
) {
729 static const uint8_t asciiDigits
[] = {
730 0x30u
, 0x31u
, 0x32u
, 0x33u
, 0x34u
, 0x35u
, 0x36u
, 0x37u
,
731 0x38u
, 0x39u
, 0x61u
, 0x62u
, 0x63u
, 0x64u
, 0x65u
, 0x66u
,
732 0x67u
, 0x68u
, 0x69u
, 0x6au
, 0x6bu
, 0x6cu
, 0x6du
, 0x6eu
,
733 0x6fu
, 0x70u
, 0x71u
, 0x72u
, 0x73u
, 0x74u
, 0x75u
, 0x76u
,
734 0x77u
, 0x78u
, 0x79u
, 0x7au
,
737 static const UChar kUMinus
= (UChar
)0x002d;
739 static const char kMinus
= '-';
741 static const uint8_t digitInfo
[] = {
742 0, 0, 0, 0, 0, 0, 0, 0,
743 0, 0, 0, 0, 0, 0, 0, 0,
744 0, 0, 0, 0, 0, 0, 0, 0,
745 0, 0, 0, 0, 0, 0, 0, 0,
746 0, 0, 0, 0, 0, 0, 0, 0,
747 0, 0, 0, 0, 0, 0, 0, 0,
748 0x80u
, 0x81u
, 0x82u
, 0x83u
, 0x84u
, 0x85u
, 0x86u
, 0x87u
,
749 0x88u
, 0x89u
, 0, 0, 0, 0, 0, 0,
750 0, 0x8au
, 0x8bu
, 0x8cu
, 0x8du
, 0x8eu
, 0x8fu
, 0x90u
,
751 0x91u
, 0x92u
, 0x93u
, 0x94u
, 0x95u
, 0x96u
, 0x97u
, 0x98u
,
752 0x99u
, 0x9au
, 0x9bu
, 0x9cu
, 0x9du
, 0x9eu
, 0x9fu
, 0xa0u
,
753 0xa1u
, 0xa2u
, 0xa3u
, 0, 0, 0, 0, 0,
754 0, 0x8au
, 0x8bu
, 0x8cu
, 0x8du
, 0x8eu
, 0x8fu
, 0x90u
,
755 0x91u
, 0x92u
, 0x93u
, 0x94u
, 0x95u
, 0x96u
, 0x97u
, 0x98u
,
756 0x99u
, 0x9au
, 0x9bu
, 0x9cu
, 0x9du
, 0x9eu
, 0x9fu
, 0xa0u
,
757 0xa1u
, 0xa2u
, 0xa3u
, 0, 0, 0, 0, 0,
761 int64_t util64_atoi(const char* str
, uint32_t radix
)
765 } else if (radix
< 2) {
768 int64_t lradix
= radix
;
771 if (*str
== kMinus
) {
777 while ((b
= digitInfo
[*str
++]) && ((b
&= 0x7f) < radix
)) {
779 result
+= (int32_t)b
;
788 int64_t util64_utoi(const UChar
* str
, uint32_t radix
)
792 } else if (radix
< 2) {
795 int64_t lradix
= radix
;
798 if (*str
== kUMinus
) {
805 while (((c
= *str
++) < 0x0080) && (b
= digitInfo
[c
]) && ((b
&= 0x7f) < radix
)) {
807 result
+= (int32_t)b
;
816 uint32_t util64_toa(int64_t w
, char* buf
, uint32_t len
, uint32_t radix
, UBool raw
)
820 } else if (radix
< 2) {
823 int64_t base
= radix
;
826 if (len
&& (w
< 0) && (radix
== 10) && !raw
) {
830 } else if (len
&& (w
== 0)) {
831 *p
++ = (char)raw
? 0 : asciiDigits
[0];
835 while (len
&& w
!= 0) {
836 int64_t n
= w
/ base
;
837 int64_t m
= n
* base
;
838 int32_t d
= (int32_t)(w
-m
);
839 *p
++ = raw
? (char)d
: asciiDigits
[d
];
844 *p
= 0; // null terminate if room for caller convenience
848 if (*buf
== kMinus
) {
862 uint32_t util64_tou(int64_t w
, UChar
* buf
, uint32_t len
, uint32_t radix
, UBool raw
)
866 } else if (radix
< 2) {
869 int64_t base
= radix
;
872 if (len
&& (w
< 0) && (radix
== 10) && !raw
) {
876 } else if (len
&& (w
== 0)) {
877 *p
++ = (UChar
)raw
? 0 : asciiDigits
[0];
881 while (len
&& (w
!= 0)) {
882 int64_t n
= w
/ base
;
883 int64_t m
= n
* base
;
884 int32_t d
= (int32_t)(w
-m
);
885 *p
++ = (UChar
)(raw
? d
: asciiDigits
[d
]);
890 *p
= 0; // null terminate if room for caller convenience
893 len
= (uint32_t)(p
- buf
);
894 if (*buf
== kUMinus
) {