2 ******************************************************************************
3 * Copyright (C) 1997-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
8 * tab size: 8 (not used)
11 * Modification history
13 * 10/11/2001 Doug Ported from ICU4J
20 #include "unicode/uchar.h"
33 // euclid's algorithm works with doubles
34 // note, doubles only get us up to one quadrillion or so, which
35 // isn't as much range as we get with longs. We probably still
36 // want either 64-bit math, or BigInteger.
39 util_lcm(int64_t x
, int64_t y
)
44 if (x
== 0 || y
== 0) {
49 int64_t t
= x
; x
= y
; y
= t
;
60 * Calculates the least common multiple of x and y.
63 util_lcm(int64_t x
, int64_t y
)
65 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
66 // vol. 2, 1st ed., pp. 298-299
71 while ((x1
& 1) == 0 && (y1
& 1) == 0) {
85 while ((t
& 1) == 0) {
96 int64_t gcd
= x1
<< p2
;
98 // x * y == gcd(x, y) * lcm(x, y)
103 static const UChar gPercent
= 0x0025;
104 static const UChar gColon
= 0x003a;
105 static const UChar gSemicolon
= 0x003b;
106 static const UChar gLineFeed
= 0x000a;
108 static const UChar gFourSpaces
[] =
110 0x20, 0x20, 0x20, 0x20, 0
112 static const UChar gPercentPercent
[] =
117 NFRuleSet::NFRuleSet(UnicodeString
* descriptions
, int32_t index
, UErrorCode
& status
)
120 , negativeNumberRule(NULL
)
121 , fIsFractionRuleSet(FALSE
)
125 for (int i
= 0; i
< 3; ++i
) {
126 fractionRules
[i
] = NULL
;
129 if (U_FAILURE(status
)) {
133 UnicodeString
& description
= descriptions
[index
]; // !!! make sure index is valid
135 if (description
.length() == 0) {
136 // throw new IllegalArgumentException("Empty rule set description");
137 status
= U_PARSE_ERROR
;
141 // if the description begins with a rule set name (the rule set
142 // name can be omitted in formatter descriptions that consist
143 // of only one rule set), copy it out into our "name" member
144 // and delete it from the description
145 if (description
.charAt(0) == gPercent
) {
146 int32_t pos
= description
.indexOf(gColon
);
148 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
149 status
= U_PARSE_ERROR
;
151 name
.setTo(description
, 0, pos
);
152 while (pos
< description
.length() && uprv_isRuleWhiteSpace(description
.charAt(++pos
))) {
154 description
.remove(0, pos
);
157 name
.setTo(UNICODE_STRING_SIMPLE("%default"));
160 if (description
.length() == 0) {
161 // throw new IllegalArgumentException("Empty rule set description");
162 status
= U_PARSE_ERROR
;
165 fIsPublic
= name
.indexOf(gPercentPercent
) != 0;
167 // all of the other members of NFRuleSet are initialized
172 NFRuleSet::parseRules(UnicodeString
& description
, const RuleBasedNumberFormat
* owner
, UErrorCode
& status
)
174 // start by creating a Vector whose elements are Strings containing
175 // the descriptions of the rules (one rule per element). The rules
176 // are separated by semicolons (there's no escape facility: ALL
177 // semicolons are rule delimiters)
179 if (U_FAILURE(status
)) {
183 // dlf - the original code kept a separate description array for no reason,
184 // so I got rid of it. The loop was too complex so I simplified it.
186 UnicodeString currentDescription
;
188 while (oldP
< description
.length()) {
189 int32_t p
= description
.indexOf(gSemicolon
, oldP
);
191 p
= description
.length();
193 currentDescription
.setTo(description
, oldP
, p
- oldP
);
194 NFRule::makeRules(currentDescription
, this, rules
.last(), owner
, rules
, status
);
198 // for rules that didn't specify a base value, their base values
199 // were initialized to 0. Make another pass through the list and
200 // set all those rules' base values. We also remove any special
201 // rules from the list and put them into their own member variables
202 int64_t defaultBaseValue
= 0;
204 // (this isn't a for loop because we might be deleting items from
205 // the vector-- we want to make sure we only increment i when
206 // we _didn't_ delete aything from the vector)
208 while (i
< rules
.size()) {
209 NFRule
* rule
= rules
[i
];
211 switch (rule
->getType()) {
212 // if the rule's base value is 0, fill in a default
213 // base value (this will be 1 plus the preceding
214 // rule's base value for regular rule sets, and the
215 // same as the preceding rule's base value in fraction
217 case NFRule::kNoBase
:
218 rule
->setBaseValue(defaultBaseValue
, status
);
219 if (!isFractionRuleSet()) {
225 // if it's the negative-number rule, copy it into its own
226 // data member and delete it from the list
227 case NFRule::kNegativeNumberRule
:
228 negativeNumberRule
= rules
.remove(i
);
231 // if it's the improper fraction rule, copy it into the
232 // correct element of fractionRules
233 case NFRule::kImproperFractionRule
:
234 fractionRules
[0] = rules
.remove(i
);
237 // if it's the proper fraction rule, copy it into the
238 // correct element of fractionRules
239 case NFRule::kProperFractionRule
:
240 fractionRules
[1] = rules
.remove(i
);
243 // if it's the master rule, copy it into the
244 // correct element of fractionRules
245 case NFRule::kMasterRule
:
246 fractionRules
[2] = rules
.remove(i
);
249 // if it's a regular rule that already knows its base value,
250 // check to make sure the rules are in order, and update
251 // the default base value for the next rule
253 if (rule
->getBaseValue() < defaultBaseValue
) {
254 // throw new IllegalArgumentException("Rules are not in order");
255 status
= U_PARSE_ERROR
;
258 defaultBaseValue
= rule
->getBaseValue();
259 if (!isFractionRuleSet()) {
268 NFRuleSet::~NFRuleSet()
270 delete negativeNumberRule
;
271 delete fractionRules
[0];
272 delete fractionRules
[1];
273 delete fractionRules
[2];
277 util_equalRules(const NFRule
* rule1
, const NFRule
* rule2
)
281 return *rule1
== *rule2
;
290 NFRuleSet::operator==(const NFRuleSet
& rhs
) const
292 if (rules
.size() == rhs
.rules
.size() &&
293 fIsFractionRuleSet
== rhs
.fIsFractionRuleSet
&&
295 util_equalRules(negativeNumberRule
, rhs
.negativeNumberRule
) &&
296 util_equalRules(fractionRules
[0], rhs
.fractionRules
[0]) &&
297 util_equalRules(fractionRules
[1], rhs
.fractionRules
[1]) &&
298 util_equalRules(fractionRules
[2], rhs
.fractionRules
[2])) {
300 for (uint32_t i
= 0; i
< rules
.size(); ++i
) {
301 if (*rules
[i
] != *rhs
.rules
[i
]) {
310 #define RECURSION_LIMIT 50
313 NFRuleSet::format(int64_t number
, UnicodeString
& toAppendTo
, int32_t pos
) const
315 NFRule
*rule
= findNormalRule(number
);
316 if (rule
) { // else error, but can't report it
317 NFRuleSet
* ncThis
= (NFRuleSet
*)this;
318 if (ncThis
->fRecursionCount
++ >= RECURSION_LIMIT
) {
320 ncThis
->fRecursionCount
= 0;
322 rule
->doFormat(number
, toAppendTo
, pos
);
323 ncThis
->fRecursionCount
--;
329 NFRuleSet::format(double number
, UnicodeString
& toAppendTo
, int32_t pos
) const
331 NFRule
*rule
= findDoubleRule(number
);
332 if (rule
) { // else error, but can't report it
333 NFRuleSet
* ncThis
= (NFRuleSet
*)this;
334 if (ncThis
->fRecursionCount
++ >= RECURSION_LIMIT
) {
336 ncThis
->fRecursionCount
= 0;
338 rule
->doFormat(number
, toAppendTo
, pos
);
339 ncThis
->fRecursionCount
--;
345 NFRuleSet::findDoubleRule(double number
) const
347 // if this is a fraction rule set, use findFractionRuleSetRule()
348 if (isFractionRuleSet()) {
349 return findFractionRuleSetRule(number
);
352 // if the number is negative, return the negative number rule
353 // (if there isn't a negative-number rule, we pretend it's a
356 if (negativeNumberRule
) {
357 return negativeNumberRule
;
363 // if the number isn't an integer, we use one of the fraction rules...
364 if (number
!= uprv_floor(number
)) {
365 // if the number is between 0 and 1, return the proper
367 if (number
< 1 && fractionRules
[1]) {
368 return fractionRules
[1];
370 // otherwise, return the improper fraction rule
371 else if (fractionRules
[0]) {
372 return fractionRules
[0];
376 // if there's a master rule, use it to format the number
377 if (fractionRules
[2]) {
378 return fractionRules
[2];
381 // and if we haven't yet returned a rule, use findNormalRule()
382 // to find the applicable rule
383 int64_t r
= util64_fromDouble(number
+ 0.5);
384 return findNormalRule(r
);
388 NFRuleSet::findNormalRule(int64_t number
) const
390 // if this is a fraction rule set, use findFractionRuleSetRule()
391 // to find the rule (we should only go into this clause if the
393 if (fIsFractionRuleSet
) {
394 return findFractionRuleSetRule((double)number
);
397 // if the number is negative, return the negative-number rule
398 // (if there isn't one, pretend the number is positive)
400 if (negativeNumberRule
) {
401 return negativeNumberRule
;
407 // we have to repeat the preceding two checks, even though we
408 // do them in findRule(), because the version of format() that
409 // takes a long bypasses findRule() and goes straight to this
410 // function. This function does skip the fraction rules since
411 // we know the value is an integer (it also skips the master
412 // rule, since it's considered a fraction rule. Skipping the
413 // master rule in this function is also how we avoid infinite
416 // {dlf} unfortunately this fails if there are no rules except
417 // special rules. If there are no rules, use the master rule.
419 // binary-search the rule list for the applicable rule
420 // (a rule is used for all values from its base value to
421 // the next rule's base value)
422 int32_t hi
= rules
.size();
427 int32_t mid
= (lo
+ hi
) / 2;
428 if (rules
[mid
]->getBaseValue() == number
) {
431 else if (rules
[mid
]->getBaseValue() > number
) {
438 if (hi
== 0) { // bad rule set, minimum base > 0
439 return NULL
; // want to throw exception here
442 NFRule
*result
= rules
[hi
- 1];
444 // use shouldRollBack() to see whether we need to invoke the
445 // rollback rule (see shouldRollBack()'s documentation for
446 // an explanation of the rollback rule). If we do, roll back
447 // one rule and return that one instead of the one we'd normally
449 if (result
->shouldRollBack((double)number
)) {
450 if (hi
== 1) { // bad rule set, no prior rule to rollback to from this base
453 result
= rules
[hi
- 2];
457 // else use the master rule
458 return fractionRules
[2];
462 * If this rule is a fraction rule set, this function is used by
463 * findRule() to select the most appropriate rule for formatting
464 * the number. Basically, the base value of each rule in the rule
465 * set is treated as the denominator of a fraction. Whichever
466 * denominator can produce the fraction closest in value to the
467 * number passed in is the result. If there's a tie, the earlier
468 * one in the list wins. (If there are two rules in a row with the
469 * same base value, the first one is used when the numerator of the
470 * fraction would be 1, and the second rule is used the rest of the
472 * @param number The number being formatted (which will always be
473 * a number between 0 and 1)
474 * @return The rule to use to format this number
477 NFRuleSet::findFractionRuleSetRule(double number
) const
479 // the obvious way to do this (multiply the value being formatted
480 // by each rule's base value until you get an integral result)
481 // doesn't work because of rounding error. This method is more
484 // find the least common multiple of the rules' base values
485 // and multiply this by the number being formatted. This is
486 // all the precision we need, and we can do all of the rest
487 // of the math using integer arithmetic
488 int64_t leastCommonMultiple
= rules
[0]->getBaseValue();
491 for (uint32_t i
= 1; i
< rules
.size(); ++i
) {
492 leastCommonMultiple
= util_lcm(leastCommonMultiple
, rules
[i
]->getBaseValue());
494 numerator
= util64_fromDouble(number
* (double)leastCommonMultiple
+ 0.5);
496 // for each rule, do the following...
497 int64_t tempDifference
;
498 int64_t difference
= util64_fromDouble(uprv_maxMantissa());
500 for (uint32_t i
= 0; i
< rules
.size(); ++i
) {
501 // "numerator" is the numerator of the fraction if the
502 // denominator is the LCD. The numerator if the rule's
503 // base value is the denominator is "numerator" times the
504 // base value divided bythe LCD. Here we check to see if
505 // that's an integer, and if not, how close it is to being
507 tempDifference
= numerator
* rules
[i
]->getBaseValue() % leastCommonMultiple
;
510 // normalize the result of the above calculation: we want
511 // the numerator's distance from the CLOSEST multiple
513 if (leastCommonMultiple
- tempDifference
< tempDifference
) {
514 tempDifference
= leastCommonMultiple
- tempDifference
;
517 // if this is as close as we've come, keep track of how close
518 // that is, and the line number of the rule that did it. If
519 // we've scored a direct hit, we don't have to look at any more
521 if (tempDifference
< difference
) {
522 difference
= tempDifference
;
524 if (difference
== 0) {
530 // if we have two successive rules that both have the winning base
531 // value, then the first one (the one we found above) is used if
532 // the numerator of the fraction is 1 and the second one is used if
533 // the numerator of the fraction is anything else (this lets us
534 // do things like "one third"/"two thirds" without haveing to define
535 // a whole bunch of extra rule sets)
536 if ((unsigned)(winner
+ 1) < rules
.size() &&
537 rules
[winner
+ 1]->getBaseValue() == rules
[winner
]->getBaseValue()) {
538 double n
= ((double)rules
[winner
]->getBaseValue()) * number
;
539 if (n
< 0.5 || n
>= 2) {
544 // finally, return the winning rule
545 return rules
[winner
];
549 * Parses a string. Matches the string to be parsed against each
550 * of its rules (with a base value less than upperBound) and returns
551 * the value produced by the rule that matched the most charcters
552 * in the source string.
553 * @param text The string to parse
554 * @param parsePosition The initial position is ignored and assumed
555 * to be 0. On exit, this object has been updated to point to the
556 * first character position this rule set didn't consume.
557 * @param upperBound Limits the rules that can be allowed to match.
558 * Only rules whose base values are strictly less than upperBound
560 * @return The numerical result of parsing this string. This will
561 * be the matching rule's base value, composed appropriately with
562 * the results of matching any of its substitutions. The object
563 * will be an instance of Long if it's an integral value; otherwise,
564 * it will be an instance of Double. This function always returns
565 * a valid object: If nothing matched the input string at all,
566 * this function returns new Long(0), and the parse position is
572 static void dumpUS(FILE* f
, const UnicodeString
& us
) {
573 int len
= us
.length();
574 char* buf
= (char *)uprv_malloc((len
+1)*sizeof(char)); //new char[len+1];
576 us
.extract(0, len
, buf
);
578 fprintf(f
, "%s", buf
);
579 uprv_free(buf
); //delete[] buf;
585 NFRuleSet::parse(const UnicodeString
& text
, ParsePosition
& pos
, double upperBound
, Formattable
& result
) const
587 // try matching each rule in the rule set against the text being
588 // parsed. Whichever one matches the most characters is the one
589 // that determines the value we return.
593 // dump out if there's no text to parse
594 if (text
.length() == 0) {
598 ParsePosition highWaterMark
;
599 ParsePosition workingPos
= pos
;
602 fprintf(stderr
, "<nfrs> %x '", this);
603 dumpUS(stderr
, name
);
604 fprintf(stderr
, "' text '");
605 dumpUS(stderr
, text
);
606 fprintf(stderr
, "'\n");
607 fprintf(stderr
, " parse negative: %d\n", this, negativeNumberRule
!= 0);
610 // start by trying the negative number rule (if there is one)
611 if (negativeNumberRule
) {
612 Formattable tempResult
;
614 fprintf(stderr
, " <nfrs before negative> %x ub: %g\n", negativeNumberRule
, upperBound
);
616 UBool success
= negativeNumberRule
->doParse(text
, workingPos
, 0, upperBound
, tempResult
);
618 fprintf(stderr
, " <nfrs after negative> success: %d wpi: %d\n", success
, workingPos
.getIndex());
620 if (success
&& workingPos
.getIndex() > highWaterMark
.getIndex()) {
622 highWaterMark
= workingPos
;
627 fprintf(stderr
, "<nfrs> continue fractional with text '");
628 dumpUS(stderr
, text
);
629 fprintf(stderr
, "' hwm: %d\n", highWaterMark
.getIndex());
631 // then try each of the fraction rules
633 for (int i
= 0; i
< 3; i
++) {
634 if (fractionRules
[i
]) {
635 Formattable tempResult
;
636 UBool success
= fractionRules
[i
]->doParse(text
, workingPos
, 0, upperBound
, tempResult
);
637 if (success
&& (workingPos
.getIndex() > highWaterMark
.getIndex())) {
639 highWaterMark
= workingPos
;
646 fprintf(stderr
, "<nfrs> continue other with text '");
647 dumpUS(stderr
, text
);
648 fprintf(stderr
, "' hwm: %d\n", highWaterMark
.getIndex());
651 // finally, go through the regular rules one at a time. We start
652 // at the end of the list because we want to try matching the most
653 // sigificant rule first (this helps ensure that we parse
654 // "five thousand three hundred six" as
655 // "(five thousand) (three hundred) (six)" rather than
656 // "((five thousand three) hundred) (six)"). Skip rules whose
657 // base values are higher than the upper bound (again, this helps
658 // limit ambiguity by making sure the rules that match a rule's
659 // are less significant than the rule containing the substitutions)/
661 int64_t ub
= util64_fromDouble(upperBound
);
665 util64_toa(ub
, ubstr
, 64);
667 util64_toa(ub
, ubstrhex
, 64, 16);
668 fprintf(stderr
, "ub: %g, i64: %s (%s)\n", upperBound
, ubstr
, ubstrhex
);
671 for (int32_t i
= rules
.size(); --i
>= 0 && highWaterMark
.getIndex() < text
.length();) {
672 if ((!fIsFractionRuleSet
) && (rules
[i
]->getBaseValue() >= ub
)) {
675 Formattable tempResult
;
676 UBool success
= rules
[i
]->doParse(text
, workingPos
, fIsFractionRuleSet
, upperBound
, tempResult
);
677 if (success
&& workingPos
.getIndex() > highWaterMark
.getIndex()) {
679 highWaterMark
= workingPos
;
685 fprintf(stderr
, "<nfrs> exit\n");
687 // finally, update the parse postion we were passed to point to the
688 // first character we didn't use, and return the result that
689 // corresponds to that string of characters
696 NFRuleSet::appendRules(UnicodeString
& result
) const
698 // the rule set name goes first...
700 result
.append(gColon
);
701 result
.append(gLineFeed
);
703 // followed by the regular rules...
704 for (uint32_t i
= 0; i
< rules
.size(); i
++) {
705 result
.append(gFourSpaces
);
706 rules
[i
]->_appendRuleText(result
);
707 result
.append(gLineFeed
);
710 // followed by the special rules (if they exist)
711 if (negativeNumberRule
) {
712 result
.append(gFourSpaces
);
713 negativeNumberRule
->_appendRuleText(result
);
714 result
.append(gLineFeed
);
718 for (uint32_t i
= 0; i
< 3; ++i
) {
719 if (fractionRules
[i
]) {
720 result
.append(gFourSpaces
);
721 fractionRules
[i
]->_appendRuleText(result
);
722 result
.append(gLineFeed
);
730 int64_t util64_fromDouble(double d
) {
732 if (!uprv_isNaN(d
)) {
733 double mant
= uprv_maxMantissa();
736 } else if (d
> mant
) {
743 result
= (int64_t)uprv_floor(d
);
751 int64_t util64_pow(int32_t r
, uint32_t e
) {
765 static const uint8_t asciiDigits
[] = {
766 0x30u
, 0x31u
, 0x32u
, 0x33u
, 0x34u
, 0x35u
, 0x36u
, 0x37u
,
767 0x38u
, 0x39u
, 0x61u
, 0x62u
, 0x63u
, 0x64u
, 0x65u
, 0x66u
,
768 0x67u
, 0x68u
, 0x69u
, 0x6au
, 0x6bu
, 0x6cu
, 0x6du
, 0x6eu
,
769 0x6fu
, 0x70u
, 0x71u
, 0x72u
, 0x73u
, 0x74u
, 0x75u
, 0x76u
,
770 0x77u
, 0x78u
, 0x79u
, 0x7au
,
773 static const UChar kUMinus
= (UChar
)0x002d;
776 static const char kMinus
= '-';
778 static const uint8_t digitInfo
[] = {
779 0, 0, 0, 0, 0, 0, 0, 0,
780 0, 0, 0, 0, 0, 0, 0, 0,
781 0, 0, 0, 0, 0, 0, 0, 0,
782 0, 0, 0, 0, 0, 0, 0, 0,
783 0, 0, 0, 0, 0, 0, 0, 0,
784 0, 0, 0, 0, 0, 0, 0, 0,
785 0x80u
, 0x81u
, 0x82u
, 0x83u
, 0x84u
, 0x85u
, 0x86u
, 0x87u
,
786 0x88u
, 0x89u
, 0, 0, 0, 0, 0, 0,
787 0, 0x8au
, 0x8bu
, 0x8cu
, 0x8du
, 0x8eu
, 0x8fu
, 0x90u
,
788 0x91u
, 0x92u
, 0x93u
, 0x94u
, 0x95u
, 0x96u
, 0x97u
, 0x98u
,
789 0x99u
, 0x9au
, 0x9bu
, 0x9cu
, 0x9du
, 0x9eu
, 0x9fu
, 0xa0u
,
790 0xa1u
, 0xa2u
, 0xa3u
, 0, 0, 0, 0, 0,
791 0, 0x8au
, 0x8bu
, 0x8cu
, 0x8du
, 0x8eu
, 0x8fu
, 0x90u
,
792 0x91u
, 0x92u
, 0x93u
, 0x94u
, 0x95u
, 0x96u
, 0x97u
, 0x98u
,
793 0x99u
, 0x9au
, 0x9bu
, 0x9cu
, 0x9du
, 0x9eu
, 0x9fu
, 0xa0u
,
794 0xa1u
, 0xa2u
, 0xa3u
, 0, 0, 0, 0, 0,
797 int64_t util64_atoi(const char* str
, uint32_t radix
)
801 } else if (radix
< 2) {
804 int64_t lradix
= radix
;
807 if (*str
== kMinus
) {
813 while ((b
= digitInfo
[*str
++]) && ((b
&= 0x7f) < radix
)) {
815 result
+= (int32_t)b
;
823 int64_t util64_utoi(const UChar
* str
, uint32_t radix
)
827 } else if (radix
< 2) {
830 int64_t lradix
= radix
;
833 if (*str
== kUMinus
) {
840 while (((c
= *str
++) < 0x0080) && (b
= digitInfo
[c
]) && ((b
&= 0x7f) < radix
)) {
842 result
+= (int32_t)b
;
850 uint32_t util64_toa(int64_t w
, char* buf
, uint32_t len
, uint32_t radix
, UBool raw
)
854 } else if (radix
< 2) {
857 int64_t base
= radix
;
860 if (len
&& (w
< 0) && (radix
== 10) && !raw
) {
864 } else if (len
&& (w
== 0)) {
865 *p
++ = (char)raw
? 0 : asciiDigits
[0];
869 while (len
&& w
!= 0) {
870 int64_t n
= w
/ base
;
871 int64_t m
= n
* base
;
872 int32_t d
= (int32_t)(w
-m
);
873 *p
++ = raw
? (char)d
: asciiDigits
[d
];
878 *p
= 0; // null terminate if room for caller convenience
882 if (*buf
== kMinus
) {
896 uint32_t util64_tou(int64_t w
, UChar
* buf
, uint32_t len
, uint32_t radix
, UBool raw
)
900 } else if (radix
< 2) {
903 int64_t base
= radix
;
906 if (len
&& (w
< 0) && (radix
== 10) && !raw
) {
910 } else if (len
&& (w
== 0)) {
911 *p
++ = (UChar
)raw
? 0 : asciiDigits
[0];
915 while (len
&& (w
!= 0)) {
916 int64_t n
= w
/ base
;
917 int64_t m
= n
* base
;
918 int32_t d
= (int32_t)(w
-m
);
919 *p
++ = (UChar
)(raw
? d
: asciiDigits
[d
]);
924 *p
= 0; // null terminate if room for caller convenience
927 len
= (uint32_t)(p
- buf
);
928 if (*buf
== kUMinus
) {