1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 * file name: nfsubs.cpp
10 * tab size: 8 (not used)
13 * Modification history
15 * 10/11/2001 Doug Ported from ICU4J
19 #include "utypeinfo.h" // for 'typeid' to work
23 #include "fmtableimp.h"
27 static const UChar gLessThan
= 0x003c;
28 static const UChar gEquals
= 0x003d;
29 static const UChar gGreaterThan
= 0x003e;
30 static const UChar gPercent
= 0x0025;
31 static const UChar gPound
= 0x0023;
32 static const UChar gZero
= 0x0030;
33 static const UChar gSpace
= 0x0020;
35 static const UChar gEqualsEquals
[] =
39 static const UChar gGreaterGreaterGreaterThan
[] =
43 static const UChar gGreaterGreaterThan
[] =
50 class SameValueSubstitution
: public NFSubstitution
{
52 SameValueSubstitution(int32_t pos
,
53 const NFRuleSet
* ruleset
,
54 const UnicodeString
& description
,
56 virtual ~SameValueSubstitution();
58 virtual int64_t transformNumber(int64_t number
) const { return number
; }
59 virtual double transformNumber(double number
) const { return number
; }
60 virtual double composeRuleValue(double newRuleValue
, double /*oldRuleValue*/) const { return newRuleValue
; }
61 virtual double calcUpperBound(double oldUpperBound
) const { return oldUpperBound
; }
62 virtual UChar
tokenChar() const { return (UChar
)0x003d; } // '='
65 static UClassID
getStaticClassID(void);
66 virtual UClassID
getDynamicClassID(void) const;
69 SameValueSubstitution::~SameValueSubstitution() {}
71 class MultiplierSubstitution
: public NFSubstitution
{
75 MultiplierSubstitution(int32_t _pos
,
77 const NFRuleSet
* _ruleSet
,
78 const UnicodeString
& description
,
80 : NFSubstitution(_pos
, _ruleSet
, description
, status
), divisor(rule
->getDivisor())
83 status
= U_PARSE_ERROR
;
86 virtual ~MultiplierSubstitution();
88 virtual void setDivisor(int32_t radix
, int16_t exponent
, UErrorCode
& status
) {
89 divisor
= util64_pow(radix
, exponent
);
92 status
= U_PARSE_ERROR
;
96 virtual UBool
operator==(const NFSubstitution
& rhs
) const;
98 virtual int64_t transformNumber(int64_t number
) const {
99 return number
/ divisor
;
102 virtual double transformNumber(double number
) const {
104 return uprv_floor(number
/ divisor
);
106 return number
/ divisor
;
110 virtual double composeRuleValue(double newRuleValue
, double /*oldRuleValue*/) const {
111 return newRuleValue
* divisor
;
114 virtual double calcUpperBound(double /*oldUpperBound*/) const { return static_cast<double>(divisor
); }
116 virtual UChar
tokenChar() const { return (UChar
)0x003c; } // '<'
119 static UClassID
getStaticClassID(void);
120 virtual UClassID
getDynamicClassID(void) const;
123 MultiplierSubstitution::~MultiplierSubstitution() {}
125 class ModulusSubstitution
: public NFSubstitution
{
127 const NFRule
* ruleToUse
;
129 ModulusSubstitution(int32_t pos
,
131 const NFRule
* rulePredecessor
,
132 const NFRuleSet
* ruleSet
,
133 const UnicodeString
& description
,
135 virtual ~ModulusSubstitution();
137 virtual void setDivisor(int32_t radix
, int16_t exponent
, UErrorCode
& status
) {
138 divisor
= util64_pow(radix
, exponent
);
141 status
= U_PARSE_ERROR
;
145 virtual UBool
operator==(const NFSubstitution
& rhs
) const;
147 virtual void doSubstitution(int64_t number
, UnicodeString
& toInsertInto
, int32_t pos
, int32_t recursionCount
, UErrorCode
& status
) const;
148 virtual void doSubstitution(double number
, UnicodeString
& toInsertInto
, int32_t pos
, int32_t recursionCount
, UErrorCode
& status
) const;
150 virtual int64_t transformNumber(int64_t number
) const { return number
% divisor
; }
151 virtual double transformNumber(double number
) const { return uprv_fmod(number
, static_cast<double>(divisor
)); }
153 virtual UBool
doParse(const UnicodeString
& text
,
154 ParsePosition
& parsePosition
,
158 uint32_t nonNumericalExecutedRuleMask
,
159 Formattable
& result
) const;
161 virtual double composeRuleValue(double newRuleValue
, double oldRuleValue
) const {
162 return oldRuleValue
- uprv_fmod(oldRuleValue
, static_cast<double>(divisor
)) + newRuleValue
;
165 virtual double calcUpperBound(double /*oldUpperBound*/) const { return static_cast<double>(divisor
); }
167 virtual UBool
isModulusSubstitution() const { return TRUE
; }
169 virtual UChar
tokenChar() const { return (UChar
)0x003e; } // '>'
171 virtual void toString(UnicodeString
& result
) const;
174 static UClassID
getStaticClassID(void);
175 virtual UClassID
getDynamicClassID(void) const;
178 ModulusSubstitution::~ModulusSubstitution() {}
180 class IntegralPartSubstitution
: public NFSubstitution
{
182 IntegralPartSubstitution(int32_t _pos
,
183 const NFRuleSet
* _ruleSet
,
184 const UnicodeString
& description
,
186 : NFSubstitution(_pos
, _ruleSet
, description
, status
) {}
187 virtual ~IntegralPartSubstitution();
189 virtual int64_t transformNumber(int64_t number
) const { return number
; }
190 virtual double transformNumber(double number
) const { return uprv_floor(number
); }
191 virtual double composeRuleValue(double newRuleValue
, double oldRuleValue
) const { return newRuleValue
+ oldRuleValue
; }
192 virtual double calcUpperBound(double /*oldUpperBound*/) const { return DBL_MAX
; }
193 virtual UChar
tokenChar() const { return (UChar
)0x003c; } // '<'
196 static UClassID
getStaticClassID(void);
197 virtual UClassID
getDynamicClassID(void) const;
200 IntegralPartSubstitution::~IntegralPartSubstitution() {}
202 class FractionalPartSubstitution
: public NFSubstitution
{
205 enum { kMaxDecimalDigits
= 8 };
207 FractionalPartSubstitution(int32_t pos
,
208 const NFRuleSet
* ruleSet
,
209 const UnicodeString
& description
,
211 virtual ~FractionalPartSubstitution();
213 virtual UBool
operator==(const NFSubstitution
& rhs
) const;
215 virtual void doSubstitution(double number
, UnicodeString
& toInsertInto
, int32_t pos
, int32_t recursionCount
, UErrorCode
& status
) const;
216 virtual void doSubstitution(int64_t /*number*/, UnicodeString
& /*toInsertInto*/, int32_t /*_pos*/, int32_t /*recursionCount*/, UErrorCode
& /*status*/) const {}
217 virtual int64_t transformNumber(int64_t /*number*/) const { return 0; }
218 virtual double transformNumber(double number
) const { return number
- uprv_floor(number
); }
220 virtual UBool
doParse(const UnicodeString
& text
,
221 ParsePosition
& parsePosition
,
225 uint32_t nonNumericalExecutedRuleMask
,
226 Formattable
& result
) const;
228 virtual double composeRuleValue(double newRuleValue
, double oldRuleValue
) const { return newRuleValue
+ oldRuleValue
; }
229 virtual double calcUpperBound(double /*oldUpperBound*/) const { return 0.0; }
230 virtual UChar
tokenChar() const { return (UChar
)0x003e; } // '>'
233 static UClassID
getStaticClassID(void);
234 virtual UClassID
getDynamicClassID(void) const;
237 FractionalPartSubstitution::~FractionalPartSubstitution() {}
239 class AbsoluteValueSubstitution
: public NFSubstitution
{
241 AbsoluteValueSubstitution(int32_t _pos
,
242 const NFRuleSet
* _ruleSet
,
243 const UnicodeString
& description
,
245 : NFSubstitution(_pos
, _ruleSet
, description
, status
) {}
246 virtual ~AbsoluteValueSubstitution();
248 virtual int64_t transformNumber(int64_t number
) const { return number
>= 0 ? number
: -number
; }
249 virtual double transformNumber(double number
) const { return uprv_fabs(number
); }
250 virtual double composeRuleValue(double newRuleValue
, double /*oldRuleValue*/) const { return -newRuleValue
; }
251 virtual double calcUpperBound(double /*oldUpperBound*/) const { return DBL_MAX
; }
252 virtual UChar
tokenChar() const { return (UChar
)0x003e; } // '>'
255 static UClassID
getStaticClassID(void);
256 virtual UClassID
getDynamicClassID(void) const;
259 AbsoluteValueSubstitution::~AbsoluteValueSubstitution() {}
261 class NumeratorSubstitution
: public NFSubstitution
{
263 int64_t ldenominator
;
266 static inline UnicodeString
fixdesc(const UnicodeString
& desc
) {
267 if (desc
.endsWith(LTLT
, 2)) {
268 UnicodeString
result(desc
, 0, desc
.length()-1);
273 NumeratorSubstitution(int32_t _pos
,
276 const UnicodeString
& description
,
278 : NFSubstitution(_pos
, _ruleSet
, fixdesc(description
), status
), denominator(_denominator
)
280 ldenominator
= util64_fromDouble(denominator
);
281 withZeros
= description
.endsWith(LTLT
, 2);
283 virtual ~NumeratorSubstitution();
285 virtual UBool
operator==(const NFSubstitution
& rhs
) const;
287 virtual int64_t transformNumber(int64_t number
) const { return number
* ldenominator
; }
288 virtual double transformNumber(double number
) const { return uprv_round(number
* denominator
); }
290 virtual void doSubstitution(int64_t /*number*/, UnicodeString
& /*toInsertInto*/, int32_t /*_pos*/, int32_t /*recursionCount*/, UErrorCode
& /*status*/) const {}
291 virtual void doSubstitution(double number
, UnicodeString
& toInsertInto
, int32_t pos
, int32_t recursionCount
, UErrorCode
& status
) const;
292 virtual UBool
doParse(const UnicodeString
& text
,
293 ParsePosition
& parsePosition
,
296 UBool
/*lenientParse*/,
297 uint32_t nonNumericalExecutedRuleMask
,
298 Formattable
& result
) const;
300 virtual double composeRuleValue(double newRuleValue
, double oldRuleValue
) const { return newRuleValue
/ oldRuleValue
; }
301 virtual double calcUpperBound(double /*oldUpperBound*/) const { return denominator
; }
302 virtual UChar
tokenChar() const { return (UChar
)0x003c; } // '<'
304 static const UChar LTLT
[2];
307 static UClassID
getStaticClassID(void);
308 virtual UClassID
getDynamicClassID(void) const;
311 NumeratorSubstitution::~NumeratorSubstitution() {}
314 NFSubstitution::makeSubstitution(int32_t pos
,
316 const NFRule
* predecessor
,
317 const NFRuleSet
* ruleSet
,
318 const RuleBasedNumberFormat
* formatter
,
319 const UnicodeString
& description
,
322 // if the description is empty, return a NullSubstitution
323 if (description
.length() == 0) {
327 switch (description
.charAt(0)) {
328 // if the description begins with '<'...
330 // throw an exception if the rule is a negative number
332 if (rule
->getBaseValue() == NFRule::kNegativeNumberRule
) {
333 // throw new IllegalArgumentException("<< not allowed in negative-number rule");
334 status
= U_PARSE_ERROR
;
338 // if the rule is a fraction rule, return an
339 // IntegralPartSubstitution
340 else if (rule
->getBaseValue() == NFRule::kImproperFractionRule
341 || rule
->getBaseValue() == NFRule::kProperFractionRule
342 || rule
->getBaseValue() == NFRule::kMasterRule
) {
343 return new IntegralPartSubstitution(pos
, ruleSet
, description
, status
);
346 // if the rule set containing the rule is a fraction
347 // rule set, return a NumeratorSubstitution
348 else if (ruleSet
->isFractionRuleSet()) {
349 return new NumeratorSubstitution(pos
, (double)rule
->getBaseValue(),
350 formatter
->getDefaultRuleSet(), description
, status
);
353 // otherwise, return a MultiplierSubstitution
355 return new MultiplierSubstitution(pos
, rule
, ruleSet
,
356 description
, status
);
359 // if the description begins with '>'...
361 // if the rule is a negative-number rule, return
362 // an AbsoluteValueSubstitution
363 if (rule
->getBaseValue() == NFRule::kNegativeNumberRule
) {
364 return new AbsoluteValueSubstitution(pos
, ruleSet
, description
, status
);
367 // if the rule is a fraction rule, return a
368 // FractionalPartSubstitution
369 else if (rule
->getBaseValue() == NFRule::kImproperFractionRule
370 || rule
->getBaseValue() == NFRule::kProperFractionRule
371 || rule
->getBaseValue() == NFRule::kMasterRule
) {
372 return new FractionalPartSubstitution(pos
, ruleSet
, description
, status
);
375 // if the rule set owning the rule is a fraction rule set,
376 // throw an exception
377 else if (ruleSet
->isFractionRuleSet()) {
378 // throw new IllegalArgumentException(">> not allowed in fraction rule set");
379 status
= U_PARSE_ERROR
;
383 // otherwise, return a ModulusSubstitution
385 return new ModulusSubstitution(pos
, rule
, predecessor
,
386 ruleSet
, description
, status
);
389 // if the description begins with '=', always return a
390 // SameValueSubstitution
392 return new SameValueSubstitution(pos
, ruleSet
, description
, status
);
394 // and if it's anything else, throw an exception
396 // throw new IllegalArgumentException("Illegal substitution character");
397 status
= U_PARSE_ERROR
;
402 NFSubstitution::NFSubstitution(int32_t _pos
,
403 const NFRuleSet
* _ruleSet
,
404 const UnicodeString
& description
,
406 : pos(_pos
), ruleSet(NULL
), numberFormat(NULL
)
408 // the description should begin and end with the same character.
409 // If it doesn't that's a syntax error. Otherwise,
410 // makeSubstitution() was the only thing that needed to know
411 // about these characters, so strip them off
412 UnicodeString
workingDescription(description
);
413 if (description
.length() >= 2
414 && description
.charAt(0) == description
.charAt(description
.length() - 1))
416 workingDescription
.remove(description
.length() - 1, 1);
417 workingDescription
.remove(0, 1);
419 else if (description
.length() != 0) {
420 // throw new IllegalArgumentException("Illegal substitution syntax");
421 status
= U_PARSE_ERROR
;
425 if (workingDescription
.length() == 0) {
426 // if the description was just two paired token characters
427 // (i.e., "<<" or ">>"), it uses the rule set it belongs to to
429 this->ruleSet
= _ruleSet
;
431 else if (workingDescription
.charAt(0) == gPercent
) {
432 // if the description contains a rule set name, that's the rule
433 // set we use to format the result: get a reference to the
435 this->ruleSet
= _ruleSet
->getOwner()->findRuleSet(workingDescription
, status
);
437 else if (workingDescription
.charAt(0) == gPound
|| workingDescription
.charAt(0) ==gZero
) {
438 // if the description begins with 0 or #, treat it as a
439 // DecimalFormat pattern, and initialize a DecimalFormat with
440 // that pattern (then set it to use the DecimalFormatSymbols
441 // belonging to our formatter)
442 const DecimalFormatSymbols
* sym
= _ruleSet
->getOwner()->getDecimalFormatSymbols();
444 status
= U_MISSING_RESOURCE_ERROR
;
447 DecimalFormat
*tempNumberFormat
= new DecimalFormat(workingDescription
, *sym
, status
);
449 if (!tempNumberFormat
) {
450 status
= U_MEMORY_ALLOCATION_ERROR
;
453 if (U_FAILURE(status
)) {
454 delete tempNumberFormat
;
457 this->numberFormat
= tempNumberFormat
;
459 else if (workingDescription
.charAt(0) == gGreaterThan
) {
460 // if the description is ">>>", this substitution bypasses the
461 // usual rule-search process and always uses the rule that precedes
462 // it in its own rule set's rule list (this is used for place-value
463 // notations: formats where you want to see a particular part of
464 // a number even when it's 0)
466 // this causes problems when >>> is used in a frationalPartSubstitution
467 // this->ruleSet = NULL;
468 this->ruleSet
= _ruleSet
;
469 this->numberFormat
= NULL
;
472 // and of the description is none of these things, it's a syntax error
474 // throw new IllegalArgumentException("Illegal substitution syntax");
475 status
= U_PARSE_ERROR
;
479 NFSubstitution::~NFSubstitution()
486 * Set's the substitution's divisor. Used by NFRule.setBaseValue().
487 * A no-op for all substitutions except multiplier and modulus
489 * @param radix The radix of the divisor
490 * @param exponent The exponent of the divisor
493 NFSubstitution::setDivisor(int32_t /*radix*/, int16_t /*exponent*/, UErrorCode
& /*status*/) {
494 // a no-op for all substitutions except multiplier and modulus substitutions
498 NFSubstitution::setDecimalFormatSymbols(const DecimalFormatSymbols
&newSymbols
, UErrorCode
& /*status*/) {
499 if (numberFormat
!= NULL
) {
500 numberFormat
->setDecimalFormatSymbols(newSymbols
);
504 //-----------------------------------------------------------------------
506 //-----------------------------------------------------------------------
508 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NFSubstitution
)
511 * Compares two substitutions for equality
512 * @param The substitution to compare this one to
513 * @return true if the two substitutions are functionally equivalent
516 NFSubstitution::operator==(const NFSubstitution
& rhs
) const
518 // compare class and all of the fields all substitutions have
520 // this should be called by subclasses before their own equality tests
521 return typeid(*this) == typeid(rhs
)
523 && (ruleSet
== NULL
) == (rhs
.ruleSet
== NULL
)
524 // && ruleSet == rhs.ruleSet causes circularity, other checks to make instead?
525 && (numberFormat
== NULL
526 ? (rhs
.numberFormat
== NULL
)
527 : (*numberFormat
== *rhs
.numberFormat
));
531 * Returns a textual description of the substitution
532 * @return A textual description of the substitution. This might
533 * not be identical to the description it was created from, but
534 * it'll produce the same result.
537 NFSubstitution::toString(UnicodeString
& text
) const
539 // use tokenChar() to get the character at the beginning and
540 // end of the substitutin token. In between them will go
541 // either the name of the rule set it uses, or the pattern of
542 // the DecimalFormat it uses
544 text
.append(tokenChar());
547 if (ruleSet
!= NULL
) {
548 ruleSet
->getName(temp
);
549 } else if (numberFormat
!= NULL
) {
550 numberFormat
->toPattern(temp
);
553 text
.append(tokenChar());
556 //-----------------------------------------------------------------------
558 //-----------------------------------------------------------------------
561 * Performs a mathematical operation on the number, formats it using
562 * either ruleSet or decimalFormat, and inserts the result into
564 * @param number The number being formatted.
565 * @param toInsertInto The string we insert the result into
566 * @param pos The position in toInsertInto where the owning rule's
567 * rule text begins (this value is added to this substitution's
568 * position to determine exactly where to insert the new text)
571 NFSubstitution::doSubstitution(int64_t number
, UnicodeString
& toInsertInto
, int32_t _pos
, int32_t recursionCount
, UErrorCode
& status
) const
573 if (ruleSet
!= NULL
) {
574 // Perform a transformation on the number that is dependent
575 // on the type of substitution this is, then just call its
576 // rule set's format() method to format the result
577 ruleSet
->format(transformNumber(number
), toInsertInto
, _pos
+ this->pos
, recursionCount
, status
);
578 } else if (numberFormat
!= NULL
) {
579 if (number
<= MAX_INT64_IN_DOUBLE
) {
580 // or perform the transformation on the number (preserving
581 // the result's fractional part if the formatter it set
582 // to show it), then use that formatter's format() method
583 // to format the result
584 double numberToFormat
= transformNumber((double)number
);
585 if (numberFormat
->getMaximumFractionDigits() == 0) {
586 numberToFormat
= uprv_floor(numberToFormat
);
590 numberFormat
->format(numberToFormat
, temp
, status
);
591 toInsertInto
.insert(_pos
+ this->pos
, temp
);
594 // We have gone beyond double precision. Something has to give.
595 // We're favoring accuracy of the large number over potential rules
596 // that round like a CompactDecimalFormat, which is not a common use case.
598 // Perform a transformation on the number that is dependent
599 // on the type of substitution this is, then just call its
600 // rule set's format() method to format the result
601 int64_t numberToFormat
= transformNumber(number
);
603 numberFormat
->format(numberToFormat
, temp
, status
);
604 toInsertInto
.insert(_pos
+ this->pos
, temp
);
610 * Performs a mathematical operation on the number, formats it using
611 * either ruleSet or decimalFormat, and inserts the result into
613 * @param number The number being formatted.
614 * @param toInsertInto The string we insert the result into
615 * @param pos The position in toInsertInto where the owning rule's
616 * rule text begins (this value is added to this substitution's
617 * position to determine exactly where to insert the new text)
620 NFSubstitution::doSubstitution(double number
, UnicodeString
& toInsertInto
, int32_t _pos
, int32_t recursionCount
, UErrorCode
& status
) const {
621 // perform a transformation on the number being formatted that
622 // is dependent on the type of substitution this is
623 double numberToFormat
= transformNumber(number
);
625 if (uprv_isInfinite(numberToFormat
)) {
626 // This is probably a minus rule. Combine it with an infinite rule.
627 const NFRule
*infiniteRule
= ruleSet
->findDoubleRule(uprv_getInfinity());
628 infiniteRule
->doFormat(numberToFormat
, toInsertInto
, _pos
+ this->pos
, recursionCount
, status
);
632 // if the result is an integer, from here on out we work in integer
633 // space (saving time and memory and preserving accuracy)
634 if (numberToFormat
== uprv_floor(numberToFormat
) && ruleSet
!= NULL
) {
635 ruleSet
->format(util64_fromDouble(numberToFormat
), toInsertInto
, _pos
+ this->pos
, recursionCount
, status
);
637 // if the result isn't an integer, then call either our rule set's
638 // format() method or our DecimalFormat's format() method to
641 if (ruleSet
!= NULL
) {
642 ruleSet
->format(numberToFormat
, toInsertInto
, _pos
+ this->pos
, recursionCount
, status
);
643 } else if (numberFormat
!= NULL
) {
645 numberFormat
->format(numberToFormat
, temp
);
646 toInsertInto
.insert(_pos
+ this->pos
, temp
);
652 //-----------------------------------------------------------------------
654 //-----------------------------------------------------------------------
661 * Parses a string using the rule set or DecimalFormat belonging
662 * to this substitution. If there's a match, a mathematical
663 * operation (the inverse of the one used in formatting) is
664 * performed on the result of the parse and the value passed in
665 * and returned as the result. The parse position is updated to
666 * point to the first unmatched character in the string.
667 * @param text The string to parse
668 * @param parsePosition On entry, ignored, but assumed to be 0.
669 * On exit, this is updated to point to the first unmatched
670 * character (or 0 if the substitution didn't match)
671 * @param baseValue A partial parse result that should be
672 * combined with the result of this parse
673 * @param upperBound When searching the rule set for a rule
674 * matching the string passed in, only rules with base values
675 * lower than this are considered
676 * @param lenientParse If true and matching against rules fails,
677 * the substitution will also try matching the text against
678 * numerals using a default-costructed NumberFormat. If false,
679 * no extra work is done. (This value is false whenever the
680 * formatter isn't in lenient-parse mode, but is also false
681 * under some conditions even when the formatter _is_ in
682 * lenient-parse mode.)
683 * @return If there's a match, this is the result of composing
684 * baseValue with whatever was returned from matching the
685 * characters. This will be either a Long or a Double. If there's
686 * no match this is new Long(0) (not null), and parsePosition
690 NFSubstitution::doParse(const UnicodeString
& text
,
691 ParsePosition
& parsePosition
,
695 uint32_t nonNumericalExecutedRuleMask
,
696 Formattable
& result
) const
699 fprintf(stderr
, "<nfsubs> %x bv: %g ub: %g\n", this, baseValue
, upperBound
);
701 // figure out the highest base value a rule can have and match
702 // the text being parsed (this varies according to the type of
703 // substitutions: multiplier, modulus, and numerator substitutions
704 // restrict the search to rules with base values lower than their
705 // own; same-value substitutions leave the upper bound wherever
706 // it was, and the others allow any rule to match
707 upperBound
= calcUpperBound(upperBound
);
709 // use our rule set to parse the text. If that fails and
710 // lenient parsing is enabled (this is always false if the
711 // formatter's lenient-parsing mode is off, but it may also
712 // be false even when the formatter's lenient-parse mode is
713 // on), then also try parsing the text using a default-
714 // constructed NumberFormat
715 if (ruleSet
!= NULL
) {
716 ruleSet
->parse(text
, parsePosition
, upperBound
, nonNumericalExecutedRuleMask
, result
);
717 if (lenientParse
&& !ruleSet
->isFractionRuleSet() && parsePosition
.getIndex() == 0) {
718 UErrorCode status
= U_ZERO_ERROR
;
719 NumberFormat
* fmt
= NumberFormat::createInstance(status
);
720 if (U_SUCCESS(status
)) {
721 fmt
->parse(text
, result
, parsePosition
);
726 // ...or use our DecimalFormat to parse the text
727 } else if (numberFormat
!= NULL
) {
728 numberFormat
->parse(text
, result
, parsePosition
);
731 // if the parse was successful, we've already advanced the caller's
732 // parse position (this is the one function that doesn't have one
733 // of its own). Derive a parse result and return it as a Long,
734 // if possible, or a Double
735 if (parsePosition
.getIndex() != 0) {
736 UErrorCode status
= U_ZERO_ERROR
;
737 double tempResult
= result
.getDouble(status
);
739 // composeRuleValue() produces a full parse result from
740 // the partial parse result passed to this function from
741 // the caller (this is either the owning rule's base value
742 // or the partial result obtained from composing the
743 // owning rule's base value with its other substitution's
744 // parse result) and the partial parse result obtained by
745 // matching the substitution (which will be the same value
746 // the caller would get by parsing just this part of the
747 // text with RuleBasedNumberFormat.parse() ). How the two
748 // values are used to derive the full parse result depends
749 // on the types of substitutions: For a regular rule, the
750 // ultimate result is its multiplier substitution's result
751 // times the rule's divisor (or the rule's base value) plus
752 // the modulus substitution's result (which will actually
753 // supersede part of the rule's base value). For a negative-
754 // number rule, the result is the negative of its substitution's
755 // result. For a fraction rule, it's the sum of its two
756 // substitution results. For a rule in a fraction rule set,
757 // it's the numerator substitution's result divided by
758 // the rule's base value. Results from same-value substitutions
759 // propagate back upard, and null substitutions don't affect
761 tempResult
= composeRuleValue(tempResult
, baseValue
);
762 result
.setDouble(tempResult
);
764 // if the parse was UNsuccessful, return 0
772 * Returns true if this is a modulus substitution. (We didn't do this
773 * with instanceof partially because it causes source files to
774 * proliferate and partially because we have to port this to C++.)
775 * @return true if this object is an instance of ModulusSubstitution
778 NFSubstitution::isModulusSubstitution() const {
784 * @return true if this is a decimal format-only substitution
787 NFSubstitution::isDecimalFormatSubstitutionOnly() const {
788 return (ruleSet
== NULL
&& getNumberFormat() != NULL
);
792 * Apple addition, not currently used
793 * @return true if this substitution uses another ruleSet
796 //NFSubstitution::isRuleSetSubstitutionOnly() const {
797 // return (getNumberFormat() == NULL && ruleSet != NULL);
800 //===================================================================
801 // SameValueSubstitution
802 //===================================================================
805 * A substitution that passes the value passed to it through unchanged.
806 * Represented by == in rule descriptions.
808 SameValueSubstitution::SameValueSubstitution(int32_t _pos
,
809 const NFRuleSet
* _ruleSet
,
810 const UnicodeString
& description
,
812 : NFSubstitution(_pos
, _ruleSet
, description
, status
)
814 if (0 == description
.compare(gEqualsEquals
, 2)) {
815 // throw new IllegalArgumentException("== is not a legal token");
816 status
= U_PARSE_ERROR
;
820 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SameValueSubstitution
)
822 //===================================================================
823 // MultiplierSubstitution
824 //===================================================================
826 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MultiplierSubstitution
)
828 UBool
MultiplierSubstitution::operator==(const NFSubstitution
& rhs
) const
830 return NFSubstitution::operator==(rhs
) &&
831 divisor
== ((const MultiplierSubstitution
*)&rhs
)->divisor
;
835 //===================================================================
836 // ModulusSubstitution
837 //===================================================================
840 * A substitution that divides the number being formatted by the its rule's
841 * divisor and formats the remainder. Represented by ">>" in a
844 ModulusSubstitution::ModulusSubstitution(int32_t _pos
,
846 const NFRule
* predecessor
,
847 const NFRuleSet
* _ruleSet
,
848 const UnicodeString
& description
,
850 : NFSubstitution(_pos
, _ruleSet
, description
, status
)
851 , divisor(rule
->getDivisor())
854 // the owning rule's divisor controls the behavior of this
855 // substitution: rather than keeping a backpointer to the rule,
856 // we keep a copy of the divisor
859 status
= U_PARSE_ERROR
;
862 if (0 == description
.compare(gGreaterGreaterGreaterThan
, 3)) {
863 // the >>> token doesn't alter how this substituion calculates the
864 // values it uses for formatting and parsing, but it changes
865 // what's done with that value after it's obtained: >>> short-
866 // circuits the rule-search process and goes straight to the
867 // specified rule to format the substitution value
868 ruleToUse
= predecessor
;
872 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ModulusSubstitution
)
874 UBool
ModulusSubstitution::operator==(const NFSubstitution
& rhs
) const
876 return NFSubstitution::operator==(rhs
) &&
877 divisor
== ((const ModulusSubstitution
*)&rhs
)->divisor
&&
878 ruleToUse
== ((const ModulusSubstitution
*)&rhs
)->ruleToUse
;
881 //-----------------------------------------------------------------------
883 //-----------------------------------------------------------------------
887 * If this is a >>> substitution, use ruleToUse to fill in
888 * the substitution. Otherwise, just use the superclass function.
889 * @param number The number being formatted
890 * @toInsertInto The string to insert the result of this substitution
892 * @param pos The position of the rule text in toInsertInto
895 ModulusSubstitution::doSubstitution(int64_t number
, UnicodeString
& toInsertInto
, int32_t _pos
, int32_t recursionCount
, UErrorCode
& status
) const
897 // if this isn't a >>> substitution, just use the inherited version
898 // of this function (which uses either a rule set or a DecimalFormat
899 // to format its substitution value)
900 if (ruleToUse
== NULL
) {
901 NFSubstitution::doSubstitution(number
, toInsertInto
, _pos
, recursionCount
, status
);
903 // a >>> substitution goes straight to a particular rule to
904 // format the substitution value
906 int64_t numberToFormat
= transformNumber(number
);
907 ruleToUse
->doFormat(numberToFormat
, toInsertInto
, _pos
+ getPos(), recursionCount
, status
);
912 * If this is a >>> substitution, use ruleToUse to fill in
913 * the substitution. Otherwise, just use the superclass function.
914 * @param number The number being formatted
915 * @toInsertInto The string to insert the result of this substitution
917 * @param pos The position of the rule text in toInsertInto
920 ModulusSubstitution::doSubstitution(double number
, UnicodeString
& toInsertInto
, int32_t _pos
, int32_t recursionCount
, UErrorCode
& status
) const
922 // if this isn't a >>> substitution, just use the inherited version
923 // of this function (which uses either a rule set or a DecimalFormat
924 // to format its substitution value)
925 if (ruleToUse
== NULL
) {
926 NFSubstitution::doSubstitution(number
, toInsertInto
, _pos
, recursionCount
, status
);
928 // a >>> substitution goes straight to a particular rule to
929 // format the substitution value
931 double numberToFormat
= transformNumber(number
);
933 ruleToUse
->doFormat(numberToFormat
, toInsertInto
, _pos
+ getPos(), recursionCount
, status
);
937 //-----------------------------------------------------------------------
939 //-----------------------------------------------------------------------
942 * If this is a >>> substitution, match only against ruleToUse.
943 * Otherwise, use the superclass function.
944 * @param text The string to parse
945 * @param parsePosition Ignored on entry, updated on exit to point to
946 * the first unmatched character.
947 * @param baseValue The partial parse result prior to calling this
951 ModulusSubstitution::doParse(const UnicodeString
& text
,
952 ParsePosition
& parsePosition
,
956 uint32_t nonNumericalExecutedRuleMask
,
957 Formattable
& result
) const
959 // if this isn't a >>> substitution, we can just use the
960 // inherited parse() routine to do the parsing
961 if (ruleToUse
== NULL
) {
962 return NFSubstitution::doParse(text
, parsePosition
, baseValue
, upperBound
, lenientParse
, nonNumericalExecutedRuleMask
, result
);
964 // but if it IS a >>> substitution, we have to do it here: we
965 // use the specific rule's doParse() method, and then we have to
966 // do some of the other work of NFRuleSet.parse()
968 ruleToUse
->doParse(text
, parsePosition
, FALSE
, upperBound
, nonNumericalExecutedRuleMask
, result
);
970 if (parsePosition
.getIndex() != 0) {
971 UErrorCode status
= U_ZERO_ERROR
;
972 double tempResult
= result
.getDouble(status
);
973 tempResult
= composeRuleValue(tempResult
, baseValue
);
974 result
.setDouble(tempResult
);
981 * Returns a textual description of the substitution
982 * @return A textual description of the substitution. This might
983 * not be identical to the description it was created from, but
984 * it'll produce the same result.
987 ModulusSubstitution::toString(UnicodeString
& text
) const
989 // use tokenChar() to get the character at the beginning and
990 // end of the substitutin token. In between them will go
991 // either the name of the rule set it uses, or the pattern of
992 // the DecimalFormat it uses
994 if ( ruleToUse
!= NULL
) { // Must have been a >>> substitution.
996 text
.append(tokenChar());
997 text
.append(tokenChar());
998 text
.append(tokenChar());
999 } else { // Otherwise just use the super-class function.
1000 NFSubstitution::toString(text
);
1003 //===================================================================
1004 // IntegralPartSubstitution
1005 //===================================================================
1007 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(IntegralPartSubstitution
)
1010 //===================================================================
1011 // FractionalPartSubstitution
1012 //===================================================================
1016 * Constructs a FractionalPartSubstitution. This object keeps a flag
1017 * telling whether it should format by digits or not. In addition,
1018 * it marks the rule set it calls (if any) as a fraction rule set.
1020 FractionalPartSubstitution::FractionalPartSubstitution(int32_t _pos
,
1021 const NFRuleSet
* _ruleSet
,
1022 const UnicodeString
& description
,
1024 : NFSubstitution(_pos
, _ruleSet
, description
, status
)
1029 // akk, ruleSet can change in superclass constructor
1030 if (0 == description
.compare(gGreaterGreaterThan
, 2) ||
1031 0 == description
.compare(gGreaterGreaterGreaterThan
, 3) ||
1032 _ruleSet
== getRuleSet()) {
1034 if (0 == description
.compare(gGreaterGreaterGreaterThan
, 3)) {
1039 ((NFRuleSet
*)getRuleSet())->makeIntoFractionRuleSet();
1043 //-----------------------------------------------------------------------
1045 //-----------------------------------------------------------------------
1048 * If in "by digits" mode, fills in the substitution one decimal digit
1049 * at a time using the rule set containing this substitution.
1050 * Otherwise, uses the superclass function.
1051 * @param number The number being formatted
1052 * @param toInsertInto The string to insert the result of formatting
1053 * the substitution into
1054 * @param pos The position of the owning rule's rule text in
1058 FractionalPartSubstitution::doSubstitution(double number
, UnicodeString
& toInsertInto
,
1059 int32_t _pos
, int32_t recursionCount
, UErrorCode
& status
) const
1061 // if we're not in "byDigits" mode, just use the inherited
1062 // doSubstitution() routine
1064 NFSubstitution::doSubstitution(number
, toInsertInto
, _pos
, recursionCount
, status
);
1066 // if we're in "byDigits" mode, transform the value into an integer
1067 // by moving the decimal point eight places to the right and
1068 // pulling digits off the right one at a time, formatting each digit
1069 // as an integer using this substitution's owning rule set
1070 // (this is slower, but more accurate, than doing it from the
1073 // int32_t numberToFormat = (int32_t)uprv_round(transformNumber(number) * uprv_pow(10, kMaxDecimalDigits));
1074 // // this flag keeps us from formatting trailing zeros. It starts
1075 // // out false because we're pulling from the right, and switches
1076 // // to true the first time we encounter a non-zero digit
1077 // UBool doZeros = FALSE;
1078 // for (int32_t i = 0; i < kMaxDecimalDigits; i++) {
1079 // int64_t digit = numberToFormat % 10;
1080 // if (digit != 0 || doZeros) {
1081 // if (doZeros && useSpaces) {
1082 // toInsertInto.insert(_pos + getPos(), gSpace);
1085 // getRuleSet()->format(digit, toInsertInto, _pos + getPos());
1087 // numberToFormat /= 10;
1092 dl
.roundFixedPoint(20); // round to 20 fraction digits.
1093 dl
.reduce(); // Removes any trailing zeros.
1096 for (int32_t didx
= dl
.getCount()-1; didx
>=dl
.getDecimalAt(); didx
--) {
1097 // Loop iterates over fraction digits, starting with the LSD.
1098 // include both real digits from the number, and zeros
1099 // to the left of the MSD but to the right of the decimal point.
1100 if (pad
&& useSpaces
) {
1101 toInsertInto
.insert(_pos
+ getPos(), gSpace
);
1105 int64_t digit
= didx
>=0 ? dl
.getDigit(didx
) - '0' : 0;
1106 getRuleSet()->format(digit
, toInsertInto
, _pos
+ getPos(), recursionCount
, status
);
1110 // hack around lack of precision in digitlist. if we would end up with
1111 // "foo point" make sure we add a " zero" to the end.
1112 getRuleSet()->format((int64_t)0, toInsertInto
, _pos
+ getPos(), recursionCount
, status
);
1117 //-----------------------------------------------------------------------
1119 //-----------------------------------------------------------------------
1122 * If in "by digits" mode, parses the string as if it were a string
1123 * of individual digits; otherwise, uses the superclass function.
1124 * @param text The string to parse
1125 * @param parsePosition Ignored on entry, but updated on exit to point
1126 * to the first unmatched character
1127 * @param baseValue The partial parse result prior to entering this
1129 * @param upperBound Only consider rules with base values lower than
1130 * this when filling in the substitution
1131 * @param lenientParse If true, try matching the text as numerals if
1132 * matching as words doesn't work
1133 * @return If the match was successful, the current partial parse
1134 * result; otherwise new Long(0). The result is either a Long or
1139 FractionalPartSubstitution::doParse(const UnicodeString
& text
,
1140 ParsePosition
& parsePosition
,
1142 double /*upperBound*/,
1144 uint32_t nonNumericalExecutedRuleMask
,
1145 Formattable
& resVal
) const
1147 // if we're not in byDigits mode, we can just use the inherited
1150 return NFSubstitution::doParse(text
, parsePosition
, baseValue
, 0, lenientParse
, nonNumericalExecutedRuleMask
, resVal
);
1152 // if we ARE in byDigits mode, parse the text one digit at a time
1153 // using this substitution's owning rule set (we do this by setting
1154 // upperBound to 10 when calling doParse() ) until we reach
1157 UnicodeString
workText(text
);
1158 ParsePosition
workPos(1);
1161 // double p10 = 0.1;
1164 NumberFormat
* fmt
= NULL
;
1165 while (workText
.length() > 0 && workPos
.getIndex() != 0) {
1166 workPos
.setIndex(0);
1168 getRuleSet()->parse(workText
, workPos
, 10, nonNumericalExecutedRuleMask
, temp
);
1169 UErrorCode status
= U_ZERO_ERROR
;
1170 digit
= temp
.getLong(status
);
1171 // digit = temp.getType() == Formattable::kLong ?
1173 // (int32_t)temp.getDouble();
1175 if (lenientParse
&& workPos
.getIndex() == 0) {
1177 status
= U_ZERO_ERROR
;
1178 fmt
= NumberFormat::createInstance(status
);
1179 if (U_FAILURE(status
)) {
1185 fmt
->parse(workText
, temp
, workPos
);
1186 digit
= temp
.getLong(status
);
1190 if (workPos
.getIndex() != 0) {
1191 dl
.append((char)('0' + digit
));
1192 // result += digit * p10;
1194 parsePosition
.setIndex(parsePosition
.getIndex() + workPos
.getIndex());
1195 workText
.removeBetween(0, workPos
.getIndex());
1196 while (workText
.length() > 0 && workText
.charAt(0) == gSpace
) {
1197 workText
.removeBetween(0, 1);
1198 parsePosition
.setIndex(parsePosition
.getIndex() + 1);
1204 result
= dl
.getCount() == 0 ? 0 : dl
.getDouble();
1205 result
= composeRuleValue(result
, baseValue
);
1206 resVal
.setDouble(result
);
1212 FractionalPartSubstitution::operator==(const NFSubstitution
& rhs
) const
1214 return NFSubstitution::operator==(rhs
) &&
1215 ((const FractionalPartSubstitution
*)&rhs
)->byDigits
== byDigits
;
1218 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FractionalPartSubstitution
)
1221 //===================================================================
1222 // AbsoluteValueSubstitution
1223 //===================================================================
1225 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AbsoluteValueSubstitution
)
1227 //===================================================================
1228 // NumeratorSubstitution
1229 //===================================================================
1232 NumeratorSubstitution::doSubstitution(double number
, UnicodeString
& toInsertInto
, int32_t apos
, int32_t recursionCount
, UErrorCode
& status
) const {
1233 // perform a transformation on the number being formatted that
1234 // is dependent on the type of substitution this is
1236 double numberToFormat
= transformNumber(number
);
1237 int64_t longNF
= util64_fromDouble(numberToFormat
);
1239 const NFRuleSet
* aruleSet
= getRuleSet();
1240 if (withZeros
&& aruleSet
!= NULL
) {
1241 // if there are leading zeros in the decimal expansion then emit them
1243 int32_t len
= toInsertInto
.length();
1244 while ((nf
*= 10) < denominator
) {
1245 toInsertInto
.insert(apos
+ getPos(), gSpace
);
1246 aruleSet
->format((int64_t)0, toInsertInto
, apos
+ getPos(), recursionCount
, status
);
1248 apos
+= toInsertInto
.length() - len
;
1251 // if the result is an integer, from here on out we work in integer
1252 // space (saving time and memory and preserving accuracy)
1253 if (numberToFormat
== longNF
&& aruleSet
!= NULL
) {
1254 aruleSet
->format(longNF
, toInsertInto
, apos
+ getPos(), recursionCount
, status
);
1256 // if the result isn't an integer, then call either our rule set's
1257 // format() method or our DecimalFormat's format() method to
1258 // format the result
1260 if (aruleSet
!= NULL
) {
1261 aruleSet
->format(numberToFormat
, toInsertInto
, apos
+ getPos(), recursionCount
, status
);
1264 getNumberFormat()->format(numberToFormat
, temp
, status
);
1265 toInsertInto
.insert(apos
+ getPos(), temp
);
1271 NumeratorSubstitution::doParse(const UnicodeString
& text
,
1272 ParsePosition
& parsePosition
,
1275 UBool
/*lenientParse*/,
1276 uint32_t nonNumericalExecutedRuleMask
,
1277 Formattable
& result
) const
1279 // we don't have to do anything special to do the parsing here,
1280 // but we have to turn lenient parsing off-- if we leave it on,
1281 // it SERIOUSLY messes up the algorithm
1283 // if withZeros is true, we need to count the zeros
1284 // and use that to adjust the parse result
1285 UErrorCode status
= U_ZERO_ERROR
;
1286 int32_t zeroCount
= 0;
1287 UnicodeString
workText(text
);
1290 ParsePosition
workPos(1);
1293 while (workText
.length() > 0 && workPos
.getIndex() != 0) {
1294 workPos
.setIndex(0);
1295 getRuleSet()->parse(workText
, workPos
, 1, nonNumericalExecutedRuleMask
, temp
); // parse zero or nothing at all
1296 if (workPos
.getIndex() == 0) {
1297 // we failed, either there were no more zeros, or the number was formatted with digits
1298 // either way, we're done
1303 parsePosition
.setIndex(parsePosition
.getIndex() + workPos
.getIndex());
1304 workText
.remove(0, workPos
.getIndex());
1305 while (workText
.length() > 0 && workText
.charAt(0) == gSpace
) {
1306 workText
.remove(0, 1);
1307 parsePosition
.setIndex(parsePosition
.getIndex() + 1);
1312 workText
.remove(0, (int32_t)parsePosition
.getIndex());
1313 parsePosition
.setIndex(0);
1316 // we've parsed off the zeros, now let's parse the rest from our current position
1317 NFSubstitution::doParse(workText
, parsePosition
, withZeros
? 1 : baseValue
, upperBound
, FALSE
, nonNumericalExecutedRuleMask
, result
);
1320 // any base value will do in this case. is there a way to
1321 // force this to not bother trying all the base values?
1323 // compute the 'effective' base and prescale the value down
1324 int64_t n
= result
.getLong(status
); // force conversion!
1331 // now add the zeros
1332 while (zeroCount
> 0) {
1336 // d is now our true denominator
1337 result
.setDouble((double)n
/(double)d
);
1344 NumeratorSubstitution::operator==(const NFSubstitution
& rhs
) const
1346 return NFSubstitution::operator==(rhs
) &&
1347 denominator
== ((const NumeratorSubstitution
*)&rhs
)->denominator
;
1350 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NumeratorSubstitution
)
1352 const UChar
NumeratorSubstitution::LTLT
[] = { 0x003c, 0x003c };