2 ******************************************************************************
3 * Copyright (C) 1997-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * file name: nfrule.cpp
8 * tab size: 8 (not used)
11 * Modification history
13 * 10/11/2001 Doug Ported from ICU4J
20 #include "unicode/localpointer.h"
21 #include "unicode/rbnf.h"
22 #include "unicode/tblcoll.h"
23 #include "unicode/plurfmt.h"
24 #include "unicode/upluralrules.h"
25 #include "unicode/coleitr.h"
26 #include "unicode/uchar.h"
30 #include "patternprops.h"
34 NFRule::NFRule(const RuleBasedNumberFormat
* _rbnf
, const UnicodeString
&_ruleText
, UErrorCode
&status
)
35 : baseValue((int32_t)0)
43 , rulePatternFormat(NULL
)
45 if (!ruleText
.isEmpty()) {
46 parseRuleDescriptor(ruleText
, status
);
58 delete rulePatternFormat
;
59 rulePatternFormat
= NULL
;
62 static const UChar gLeftBracket
= 0x005b;
63 static const UChar gRightBracket
= 0x005d;
64 static const UChar gColon
= 0x003a;
65 static const UChar gZero
= 0x0030;
66 static const UChar gNine
= 0x0039;
67 static const UChar gSpace
= 0x0020;
68 static const UChar gSlash
= 0x002f;
69 static const UChar gGreaterThan
= 0x003e;
70 static const UChar gLessThan
= 0x003c;
71 static const UChar gComma
= 0x002c;
72 static const UChar gDot
= 0x002e;
73 static const UChar gTick
= 0x0027;
74 //static const UChar gMinus = 0x002d;
75 static const UChar gSemicolon
= 0x003b;
76 static const UChar gX
= 0x0078;
78 static const UChar gMinusX
[] = {0x2D, 0x78, 0}; /* "-x" */
79 static const UChar gInf
[] = {0x49, 0x6E, 0x66, 0}; /* "Inf" */
80 static const UChar gNaN
[] = {0x4E, 0x61, 0x4E, 0}; /* "NaN" */
82 static const UChar gDollarOpenParenthesis
[] = {0x24, 0x28, 0}; /* "$(" */
83 static const UChar gClosedParenthesisDollar
[] = {0x29, 0x24, 0}; /* ")$" */
85 static const UChar gLessLess
[] = {0x3C, 0x3C, 0}; /* "<<" */
86 static const UChar gLessPercent
[] = {0x3C, 0x25, 0}; /* "<%" */
87 static const UChar gLessHash
[] = {0x3C, 0x23, 0}; /* "<#" */
88 static const UChar gLessZero
[] = {0x3C, 0x30, 0}; /* "<0" */
89 static const UChar gGreaterGreater
[] = {0x3E, 0x3E, 0}; /* ">>" */
90 static const UChar gGreaterPercent
[] = {0x3E, 0x25, 0}; /* ">%" */
91 static const UChar gGreaterHash
[] = {0x3E, 0x23, 0}; /* ">#" */
92 static const UChar gGreaterZero
[] = {0x3E, 0x30, 0}; /* ">0" */
93 static const UChar gEqualPercent
[] = {0x3D, 0x25, 0}; /* "=%" */
94 static const UChar gEqualHash
[] = {0x3D, 0x23, 0}; /* "=#" */
95 static const UChar gEqualZero
[] = {0x3D, 0x30, 0}; /* "=0" */
96 static const UChar gGreaterGreaterGreater
[] = {0x3E, 0x3E, 0x3E, 0}; /* ">>>" */
98 static const UChar
* const RULE_PREFIXES
[] = {
99 gLessLess
, gLessPercent
, gLessHash
, gLessZero
,
100 gGreaterGreater
, gGreaterPercent
,gGreaterHash
, gGreaterZero
,
101 gEqualPercent
, gEqualHash
, gEqualZero
, NULL
105 NFRule::makeRules(UnicodeString
& description
,
107 const NFRule
*predecessor
,
108 const RuleBasedNumberFormat
*rbnf
,
112 // we know we're making at least one rule, so go ahead and
113 // new it up and initialize its basevalue and divisor
114 // (this also strips the rule descriptor, if any, off the
115 // descripton string)
116 NFRule
* rule1
= new NFRule(rbnf
, description
, status
);
119 status
= U_MEMORY_ALLOCATION_ERROR
;
122 description
= rule1
->ruleText
;
124 // check the description to see whether there's text enclosed
126 int32_t brack1
= description
.indexOf(gLeftBracket
);
127 int32_t brack2
= brack1
< 0 ? -1 : description
.indexOf(gRightBracket
);
129 // if the description doesn't contain a matched pair of brackets,
130 // or if it's of a type that doesn't recognize bracketed text,
131 // then leave the description alone, initialize the rule's
132 // rule text and substitutions, and return that rule
133 if (brack2
< 0 || brack1
> brack2
134 || rule1
->getType() == kProperFractionRule
135 || rule1
->getType() == kNegativeNumberRule
136 || rule1
->getType() == kInfinityRule
137 || rule1
->getType() == kNaNRule
)
139 rule1
->extractSubstitutions(owner
, description
, predecessor
, status
);
142 // if the description does contain a matched pair of brackets,
143 // then it's really shorthand for two rules (with one exception)
144 NFRule
* rule2
= NULL
;
147 // we'll actually only split the rule into two rules if its
148 // base value is an even multiple of its divisor (or it's one
149 // of the special rules)
150 if ((rule1
->baseValue
> 0
151 && (rule1
->baseValue
% util64_pow(rule1
->radix
, rule1
->exponent
)) == 0)
152 || rule1
->getType() == kImproperFractionRule
153 || rule1
->getType() == kMasterRule
) {
155 // if it passes that test, new up the second rule. If the
156 // rule set both rules will belong to is a fraction rule
157 // set, they both have the same base value; otherwise,
158 // increment the original rule's base value ("rule1" actually
159 // goes SECOND in the rule set's rule list)
160 rule2
= new NFRule(rbnf
, UnicodeString(), status
);
163 status
= U_MEMORY_ALLOCATION_ERROR
;
166 if (rule1
->baseValue
>= 0) {
167 rule2
->baseValue
= rule1
->baseValue
;
168 if (!owner
->isFractionRuleSet()) {
173 // if the description began with "x.x" and contains bracketed
174 // text, it describes both the improper fraction rule and
175 // the proper fraction rule
176 else if (rule1
->getType() == kImproperFractionRule
) {
177 rule2
->setType(kProperFractionRule
);
180 // if the description began with "x.0" and contains bracketed
181 // text, it describes both the master rule and the
182 // improper fraction rule
183 else if (rule1
->getType() == kMasterRule
) {
184 rule2
->baseValue
= rule1
->baseValue
;
185 rule1
->setType(kImproperFractionRule
);
188 // both rules have the same radix and exponent (i.e., the
190 rule2
->radix
= rule1
->radix
;
191 rule2
->exponent
= rule1
->exponent
;
193 // rule2's rule text omits the stuff in brackets: initalize
194 // its rule text and substitutions accordingly
195 sbuf
.append(description
, 0, brack1
);
196 if (brack2
+ 1 < description
.length()) {
197 sbuf
.append(description
, brack2
+ 1, description
.length() - brack2
- 1);
199 rule2
->extractSubstitutions(owner
, sbuf
, predecessor
, status
);
202 // rule1's text includes the text in the brackets but omits
203 // the brackets themselves: initialize _its_ rule text and
204 // substitutions accordingly
205 sbuf
.setTo(description
, 0, brack1
);
206 sbuf
.append(description
, brack1
+ 1, brack2
- brack1
- 1);
207 if (brack2
+ 1 < description
.length()) {
208 sbuf
.append(description
, brack2
+ 1, description
.length() - brack2
- 1);
210 rule1
->extractSubstitutions(owner
, sbuf
, predecessor
, status
);
212 // if we only have one rule, return it; if we have two, return
213 // a two-element array containing them (notice that rule2 goes
214 // BEFORE rule1 in the list: in all cases, rule2 OMITS the
215 // material in the brackets and rule1 INCLUDES the material
218 if (rule2
->baseValue
>= kNoBase
) {
222 owner
->setNonNumericalRule(rule2
);
226 if (rule1
->baseValue
>= kNoBase
) {
230 owner
->setNonNumericalRule(rule1
);
235 * This function parses the rule's rule descriptor (i.e., the base
236 * value and/or other tokens that precede the rule's rule text
237 * in the description) and sets the rule's base value, radix, and
238 * exponent according to the descriptor. (If the description doesn't
239 * include a rule descriptor, then this function sets everything to
240 * default values and the rule set sets the rule's real base value).
241 * @param description The rule's description
242 * @return If "description" included a rule descriptor, this is
243 * "description" with the descriptor and any trailing whitespace
244 * stripped off. Otherwise; it's "descriptor" unchangd.
247 NFRule::parseRuleDescriptor(UnicodeString
& description
, UErrorCode
& status
)
249 // the description consists of a rule descriptor and a rule body,
250 // separated by a colon. The rule descriptor is optional. If
251 // it's omitted, just set the base value to 0.
252 int32_t p
= description
.indexOf(gColon
);
254 // copy the descriptor out into its own string and strip it,
255 // along with any trailing whitespace, out of the original
257 UnicodeString descriptor
;
258 descriptor
.setTo(description
, 0, p
);
261 while (p
< description
.length() && PatternProps::isWhiteSpace(description
.charAt(p
))) {
264 description
.removeBetween(0, p
);
266 // check first to see if the rule descriptor matches the token
267 // for one of the special rules. If it does, set the base
268 // value to the correct identifier value
269 int descriptorLength
= descriptor
.length();
270 UChar firstChar
= descriptor
.charAt(0);
271 UChar lastChar
= descriptor
.charAt(descriptorLength
- 1);
272 if (firstChar
>= gZero
&& firstChar
<= gNine
&& lastChar
!= gX
) {
273 // if the rule descriptor begins with a digit, it's a descriptor
275 // since we don't have Long.parseLong, and this isn't much work anyway,
276 // just build up the value as we encounter the digits.
281 // begin parsing the descriptor: copy digits
282 // into "tempValue", skip periods, commas, and spaces,
283 // stop on a slash or > sign (or at the end of the string),
284 // and throw an exception on any other character
286 while (p
< descriptorLength
) {
287 c
= descriptor
.charAt(p
);
288 if (c
>= gZero
&& c
<= gNine
) {
289 val
= val
* ll_10
+ (int32_t)(c
- gZero
);
291 else if (c
== gSlash
|| c
== gGreaterThan
) {
294 else if (PatternProps::isWhiteSpace(c
) || c
== gComma
|| c
== gDot
) {
297 // throw new IllegalArgumentException("Illegal character in rule descriptor");
298 status
= U_PARSE_ERROR
;
304 // we have the base value, so set it
305 setBaseValue(val
, status
);
307 // if we stopped the previous loop on a slash, we're
308 // now parsing the rule's radix. Again, accumulate digits
309 // in tempValue, skip punctuation, stop on a > mark, and
310 // throw an exception on anything else
315 while (p
< descriptorLength
) {
316 c
= descriptor
.charAt(p
);
317 if (c
>= gZero
&& c
<= gNine
) {
318 val
= val
* ll_10
+ (int32_t)(c
- gZero
);
320 else if (c
== gGreaterThan
) {
323 else if (PatternProps::isWhiteSpace(c
) || c
== gComma
|| c
== gDot
) {
326 // throw new IllegalArgumentException("Illegal character is rule descriptor");
327 status
= U_PARSE_ERROR
;
333 // tempValue now contain's the rule's radix. Set it
334 // accordingly, and recalculate the rule's exponent
335 radix
= (int32_t)val
;
337 // throw new IllegalArgumentException("Rule can't have radix of 0");
338 status
= U_PARSE_ERROR
;
341 exponent
= expectedExponent();
344 // if we stopped the previous loop on a > sign, then continue
345 // for as long as we still see > signs. For each one,
346 // decrement the exponent (unless the exponent is already 0).
347 // If we see another character before reaching the end of
348 // the descriptor, that's also a syntax error.
349 if (c
== gGreaterThan
) {
350 while (p
< descriptor
.length()) {
351 c
= descriptor
.charAt(p
);
352 if (c
== gGreaterThan
&& exponent
> 0) {
355 // throw new IllegalArgumentException("Illegal character in rule descriptor");
356 status
= U_PARSE_ERROR
;
363 else if (0 == descriptor
.compare(gMinusX
, 2)) {
364 setType(kNegativeNumberRule
);
366 else if (descriptorLength
== 3) {
367 if (firstChar
== gZero
&& lastChar
== gX
) {
368 setBaseValue(kProperFractionRule
, status
);
369 decimalPoint
= descriptor
.charAt(1);
371 else if (firstChar
== gX
&& lastChar
== gX
) {
372 setBaseValue(kImproperFractionRule
, status
);
373 decimalPoint
= descriptor
.charAt(1);
375 else if (firstChar
== gX
&& lastChar
== gZero
) {
376 setBaseValue(kMasterRule
, status
);
377 decimalPoint
= descriptor
.charAt(1);
379 else if (descriptor
.compare(gNaN
, 3) == 0) {
380 setBaseValue(kNaNRule
, status
);
382 else if (descriptor
.compare(gInf
, 3) == 0) {
383 setBaseValue(kInfinityRule
, status
);
387 // else use the default base value for now.
389 // finally, if the rule body begins with an apostrophe, strip it off
390 // (this is generally used to put whitespace at the beginning of
391 // a rule's rule text)
392 if (description
.length() > 0 && description
.charAt(0) == gTick
) {
393 description
.removeBetween(0, 1);
396 // return the description with all the stuff we've just waded through
397 // stripped off the front. It now contains just the rule body.
398 // return description;
402 * Searches the rule's rule text for the substitution tokens,
403 * creates the substitutions, and removes the substitution tokens
404 * from the rule's rule text.
405 * @param owner The rule set containing this rule
406 * @param predecessor The rule preseding this one in "owners" rule list
407 * @param ownersOwner The RuleBasedFormat that owns this rule
410 NFRule::extractSubstitutions(const NFRuleSet
* ruleSet
,
411 const UnicodeString
&ruleText
,
412 const NFRule
* predecessor
,
415 if (U_FAILURE(status
)) {
418 this->ruleText
= ruleText
;
419 sub1
= extractSubstitution(ruleSet
, predecessor
, status
);
421 // Small optimization. There is no need to create a redundant NullSubstitution.
425 sub2
= extractSubstitution(ruleSet
, predecessor
, status
);
427 int32_t pluralRuleStart
= this->ruleText
.indexOf(gDollarOpenParenthesis
, -1, 0);
428 int32_t pluralRuleEnd
= (pluralRuleStart
>= 0 ? this->ruleText
.indexOf(gClosedParenthesisDollar
, -1, pluralRuleStart
) : -1);
429 if (pluralRuleEnd
>= 0) {
430 int32_t endType
= this->ruleText
.indexOf(gComma
, pluralRuleStart
);
432 status
= U_PARSE_ERROR
;
435 UnicodeString
type(this->ruleText
.tempSubString(pluralRuleStart
+ 2, endType
- pluralRuleStart
- 2));
436 UPluralType pluralType
;
437 if (type
.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) {
438 pluralType
= UPLURAL_TYPE_CARDINAL
;
440 else if (type
.startsWith(UNICODE_STRING_SIMPLE("ordinal"))) {
441 pluralType
= UPLURAL_TYPE_ORDINAL
;
444 status
= U_ILLEGAL_ARGUMENT_ERROR
;
447 rulePatternFormat
= formatter
->createPluralFormat(pluralType
,
448 this->ruleText
.tempSubString(endType
+ 1, pluralRuleEnd
- endType
- 1), status
);
453 * Searches the rule's rule text for the first substitution token,
454 * creates a substitution based on it, and removes the token from
455 * the rule's rule text.
456 * @param owner The rule set containing this rule
457 * @param predecessor The rule preceding this one in the rule set's
459 * @param ownersOwner The RuleBasedNumberFormat that owns this rule
460 * @return The newly-created substitution. This is never null; if
461 * the rule text doesn't contain any substitution tokens, this will
462 * be a NullSubstitution.
465 NFRule::extractSubstitution(const NFRuleSet
* ruleSet
,
466 const NFRule
* predecessor
,
469 NFSubstitution
* result
= NULL
;
471 // search the rule's rule text for the first two characters of
472 // a substitution token
473 int32_t subStart
= indexOfAnyRulePrefix();
474 int32_t subEnd
= subStart
;
476 // if we didn't find one, create a null substitution positioned
477 // at the end of the rule text
478 if (subStart
== -1) {
482 // special-case the ">>>" token, since searching for the > at the
483 // end will actually find the > in the middle
484 if (ruleText
.indexOf(gGreaterGreaterGreater
, 3, 0) == subStart
) {
485 subEnd
= subStart
+ 2;
487 // otherwise the substitution token ends with the same character
490 UChar c
= ruleText
.charAt(subStart
);
491 subEnd
= ruleText
.indexOf(c
, subStart
+ 1);
492 // special case for '<%foo<<'
493 if (c
== gLessThan
&& subEnd
!= -1 && subEnd
< ruleText
.length() - 1 && ruleText
.charAt(subEnd
+1) == c
) {
494 // ordinals use "=#,##0==%abbrev=" as their rule. Notice that the '==' in the middle
495 // occurs because of the juxtaposition of two different rules. The check for '<' is a hack
496 // to get around this. Having the duplicate at the front would cause problems with
497 // rules like "<<%" to format, say, percents...
502 // if we don't find the end of the token (i.e., if we're on a single,
503 // unmatched token character), create a null substitution positioned
504 // at the end of the rule
509 // if we get here, we have a real substitution token (or at least
510 // some text bounded by substitution token characters). Use
511 // makeSubstitution() to create the right kind of substitution
512 UnicodeString subToken
;
513 subToken
.setTo(ruleText
, subStart
, subEnd
+ 1 - subStart
);
514 result
= NFSubstitution::makeSubstitution(subStart
, this, predecessor
, ruleSet
,
515 this->formatter
, subToken
, status
);
517 // remove the substitution from the rule text
518 ruleText
.removeBetween(subStart
, subEnd
+1);
524 * Sets the rule's base value, and causes the radix and exponent
525 * to be recalculated. This is used during construction when we
526 * don't know the rule's base value until after it's been
527 * constructed. It should be used at any other time.
528 * @param The new base value for the rule.
531 NFRule::setBaseValue(int64_t newBaseValue
, UErrorCode
& status
)
533 // set the base value
534 baseValue
= newBaseValue
;
537 // if this isn't a special rule, recalculate the radix and exponent
538 // (the radix always defaults to 10; if it's supposed to be something
539 // else, it's cleaned up by the caller and the exponent is
540 // recalculated again-- the only function that does this is
541 // NFRule.parseRuleDescriptor() )
542 if (baseValue
>= 1) {
543 exponent
= expectedExponent();
545 // this function gets called on a fully-constructed rule whose
546 // description didn't specify a base value. This means it
547 // has substitutions, and some substitutions hold on to copies
548 // of the rule's divisor. Fix their copies of the divisor.
550 sub1
->setDivisor(radix
, exponent
, status
);
553 sub2
->setDivisor(radix
, exponent
, status
);
556 // if this is a special rule, its radix and exponent are basically
557 // ignored. Set them to "safe" default values
564 * This calculates the rule's exponent based on its radix and base
565 * value. This will be the highest power the radix can be raised to
566 * and still produce a result less than or equal to the base value.
569 NFRule::expectedExponent() const
571 // since the log of 0, or the log base 0 of something, causes an
572 // error, declare the exponent in these cases to be 0 (we also
573 // deal with the special-rule identifiers here)
574 if (radix
== 0 || baseValue
< 1) {
578 // we get rounding error in some cases-- for example, log 1000 / log 10
579 // gives us 1.9999999996 instead of 2. The extra logic here is to take
581 int16_t tempResult
= (int16_t)(uprv_log((double)baseValue
) / uprv_log((double)radix
));
582 int64_t temp
= util64_pow(radix
, tempResult
+ 1);
583 if (temp
<= baseValue
) {
590 * Searches the rule's rule text for any of the specified strings.
591 * @return The index of the first match in the rule's rule text
592 * (i.e., the first substring in the rule's rule text that matches
593 * _any_ of the strings in "strings"). If none of the strings in
594 * "strings" is found in the rule's rule text, returns -1.
597 NFRule::indexOfAnyRulePrefix() const
600 for (int i
= 0; RULE_PREFIXES
[i
]; i
++) {
601 int32_t pos
= ruleText
.indexOf(*RULE_PREFIXES
[i
]);
602 if (pos
!= -1 && (result
== -1 || pos
< result
)) {
609 //-----------------------------------------------------------------------
611 //-----------------------------------------------------------------------
614 util_equalSubstitutions(const NFSubstitution
* sub1
, const NFSubstitution
* sub2
)
618 return *sub1
== *sub2
;
627 * Tests two rules for equality.
628 * @param that The rule to compare this one against
629 * @return True is the two rules are functionally equivalent
632 NFRule::operator==(const NFRule
& rhs
) const
634 return baseValue
== rhs
.baseValue
635 && radix
== rhs
.radix
636 && exponent
== rhs
.exponent
637 && ruleText
== rhs
.ruleText
638 && util_equalSubstitutions(sub1
, rhs
.sub1
)
639 && util_equalSubstitutions(sub2
, rhs
.sub2
);
643 * Returns a textual representation of the rule. This won't
644 * necessarily be the same as the description that this rule
645 * was created with, but it will produce the same result.
646 * @return A textual description of the rule
648 static void util_append64(UnicodeString
& result
, int64_t n
)
651 int32_t len
= util64_tou(n
, buffer
, sizeof(buffer
));
652 UnicodeString
temp(buffer
, len
);
657 NFRule::_appendRuleText(UnicodeString
& result
) const
660 case kNegativeNumberRule
: result
.append(gMinusX
, 2); break;
661 case kImproperFractionRule
: result
.append(gX
).append(decimalPoint
== 0 ? gDot
: decimalPoint
).append(gX
); break;
662 case kProperFractionRule
: result
.append(gZero
).append(decimalPoint
== 0 ? gDot
: decimalPoint
).append(gX
); break;
663 case kMasterRule
: result
.append(gX
).append(decimalPoint
== 0 ? gDot
: decimalPoint
).append(gZero
); break;
664 case kInfinityRule
: result
.append(gInf
, 3); break;
665 case kNaNRule
: result
.append(gNaN
, 3); break;
667 // for a normal rule, write out its base value, and if the radix is
668 // something other than 10, write out the radix (with the preceding
669 // slash, of course). Then calculate the expected exponent and if
670 // if isn't the same as the actual exponent, write an appropriate
671 // number of > signs. Finally, terminate the whole thing with
673 util_append64(result
, baseValue
);
675 result
.append(gSlash
);
676 util_append64(result
, radix
);
678 int numCarets
= expectedExponent() - exponent
;
679 for (int i
= 0; i
< numCarets
; i
++) {
680 result
.append(gGreaterThan
);
684 result
.append(gColon
);
685 result
.append(gSpace
);
687 // if the rule text begins with a space, write an apostrophe
688 // (whitespace after the rule descriptor is ignored; the
689 // apostrophe is used to make the whitespace significant)
690 if (ruleText
.charAt(0) == gSpace
&& (sub1
== NULL
|| sub1
->getPos() != 0)) {
691 result
.append(gTick
);
694 // now, write the rule's rule text, inserting appropriate
695 // substitution tokens in the appropriate places
696 UnicodeString ruleTextCopy
;
697 ruleTextCopy
.setTo(ruleText
);
701 sub2
->toString(temp
);
702 ruleTextCopy
.insert(sub2
->getPos(), temp
);
705 sub1
->toString(temp
);
706 ruleTextCopy
.insert(sub1
->getPos(), temp
);
709 result
.append(ruleTextCopy
);
711 // and finally, top the whole thing off with a semicolon and
713 result
.append(gSemicolon
);
716 //-----------------------------------------------------------------------
718 //-----------------------------------------------------------------------
721 * Formats the number, and inserts the resulting text into
723 * @param number The number being formatted
724 * @param toInsertInto The string where the resultant text should
726 * @param pos The position in toInsertInto where the resultant text
730 NFRule::doFormat(int64_t number
, UnicodeString
& toInsertInto
, int32_t pos
, int32_t recursionCount
, UErrorCode
& status
) const
732 // first, insert the rule's rule text into toInsertInto at the
733 // specified position, then insert the results of the substitutions
734 // into the right places in toInsertInto (notice we do the
735 // substitutions in reverse order so that the offsets don't get
737 int32_t pluralRuleStart
= ruleText
.length();
738 int32_t lengthOffset
= 0;
739 if (!rulePatternFormat
) {
740 toInsertInto
.insert(pos
, ruleText
);
743 pluralRuleStart
= ruleText
.indexOf(gDollarOpenParenthesis
, -1, 0);
744 int pluralRuleEnd
= ruleText
.indexOf(gClosedParenthesisDollar
, -1, pluralRuleStart
);
745 int initialLength
= toInsertInto
.length();
746 if (pluralRuleEnd
< ruleText
.length() - 1) {
747 toInsertInto
.insert(pos
, ruleText
.tempSubString(pluralRuleEnd
+ 2));
749 toInsertInto
.insert(pos
,
750 rulePatternFormat
->format((int32_t)(number
/uprv_pow(radix
, exponent
)), status
));
751 if (pluralRuleStart
> 0) {
752 toInsertInto
.insert(pos
, ruleText
.tempSubString(0, pluralRuleStart
));
754 lengthOffset
= ruleText
.length() - (toInsertInto
.length() - initialLength
);
758 sub2
->doSubstitution(number
, toInsertInto
, pos
- (sub2
->getPos() > pluralRuleStart
? lengthOffset
: 0), recursionCount
, status
);
761 sub1
->doSubstitution(number
, toInsertInto
, pos
- (sub1
->getPos() > pluralRuleStart
? lengthOffset
: 0), recursionCount
, status
);
766 * Formats the number, and inserts the resulting text into
768 * @param number The number being formatted
769 * @param toInsertInto The string where the resultant text should
771 * @param pos The position in toInsertInto where the resultant text
775 NFRule::doFormat(double number
, UnicodeString
& toInsertInto
, int32_t pos
, int32_t recursionCount
, UErrorCode
& status
) const
777 // first, insert the rule's rule text into toInsertInto at the
778 // specified position, then insert the results of the substitutions
779 // into the right places in toInsertInto
780 // [again, we have two copies of this routine that do the same thing
781 // so that we don't sacrifice precision in a long by casting it
783 int32_t pluralRuleStart
= ruleText
.length();
784 int32_t lengthOffset
= 0;
785 if (!rulePatternFormat
) {
786 toInsertInto
.insert(pos
, ruleText
);
789 pluralRuleStart
= ruleText
.indexOf(gDollarOpenParenthesis
, -1, 0);
790 int pluralRuleEnd
= ruleText
.indexOf(gClosedParenthesisDollar
, -1, pluralRuleStart
);
791 int initialLength
= toInsertInto
.length();
792 if (pluralRuleEnd
< ruleText
.length() - 1) {
793 toInsertInto
.insert(pos
, ruleText
.tempSubString(pluralRuleEnd
+ 2));
795 double pluralVal
= number
;
796 if (0 <= pluralVal
&& pluralVal
< 1) {
797 // We're in a fractional rule, and we have to match the NumeratorSubstitution behavior.
798 // 2.3 can become 0.2999999999999998 for the fraction due to rounding errors.
799 pluralVal
= uprv_round(pluralVal
* uprv_pow(radix
, exponent
));
802 pluralVal
= pluralVal
/ uprv_pow(radix
, exponent
);
804 toInsertInto
.insert(pos
, rulePatternFormat
->format((int32_t)(pluralVal
), status
));
805 if (pluralRuleStart
> 0) {
806 toInsertInto
.insert(pos
, ruleText
.tempSubString(0, pluralRuleStart
));
808 lengthOffset
= ruleText
.length() - (toInsertInto
.length() - initialLength
);
812 sub2
->doSubstitution(number
, toInsertInto
, pos
- (sub2
->getPos() > pluralRuleStart
? lengthOffset
: 0), recursionCount
, status
);
815 sub1
->doSubstitution(number
, toInsertInto
, pos
- (sub1
->getPos() > pluralRuleStart
? lengthOffset
: 0), recursionCount
, status
);
820 * Used by the owning rule set to determine whether to invoke the
821 * rollback rule (i.e., whether this rule or the one that precedes
822 * it in the rule set's list should be used to format the number)
823 * @param The number being formatted
824 * @return True if the rule set should use the rule that precedes
825 * this one in its list; false if it should use this rule
828 NFRule::shouldRollBack(double number
) const
830 // we roll back if the rule contains a modulus substitution,
831 // the number being formatted is an even multiple of the rule's
832 // divisor, and the rule's base value is NOT an even multiple
834 // In other words, if the original description had
835 // 100: << hundred[ >>];
838 // 101: << hundred >>;
839 // internally. But when we're formatting 200, if we use the rule
840 // at 101, which would normally apply, we get "two hundred zero".
841 // To prevent this, we roll back and use the rule at 100 instead.
842 // This is the logic that makes this happen: the rule at 101 has
843 // a modulus substitution, its base value isn't an even multiple
844 // of 100, and the value we're trying to format _is_ an even
845 // multiple of 100. This is called the "rollback rule."
846 if ((sub1
!= NULL
&& sub1
->isModulusSubstitution()) || (sub2
!= NULL
&& sub2
->isModulusSubstitution())) {
847 int64_t re
= util64_pow(radix
, exponent
);
848 return uprv_fmod(number
, (double)re
) == 0 && (baseValue
% re
) != 0;
853 //-----------------------------------------------------------------------
855 //-----------------------------------------------------------------------
858 * Attempts to parse the string with this rule.
859 * @param text The string being parsed
860 * @param parsePosition On entry, the value is ignored and assumed to
861 * be 0. On exit, this has been updated with the position of the first
862 * character not consumed by matching the text against this rule
863 * (if this rule doesn't match the text at all, the parse position
864 * if left unchanged (presumably at 0) and the function returns
866 * @param isFractionRule True if this rule is contained within a
867 * fraction rule set. This is only used if the rule has no
869 * @return If this rule matched the text, this is the rule's base value
870 * combined appropriately with the results of parsing the substitutions.
871 * If nothing matched, this is new Long(0) and the parse position is
872 * left unchanged. The result will be an instance of Long if the
873 * result is an integer and Double otherwise. The result is never null.
878 static void dumpUS(FILE* f
, const UnicodeString
& us
) {
879 int len
= us
.length();
880 char* buf
= (char *)uprv_malloc((len
+1)*sizeof(char)); //new char[len+1];
882 us
.extract(0, len
, buf
);
884 fprintf(f
, "%s", buf
);
885 uprv_free(buf
); //delete[] buf;
890 NFRule::doParse(const UnicodeString
& text
,
891 ParsePosition
& parsePosition
,
892 UBool isFractionRule
,
895 UBool isDecimFmtParseable
) const
897 // internally we operate on a copy of the string being parsed
898 // (because we're going to change it) and use our own ParsePosition
900 UnicodeString
workText(text
);
902 int32_t sub1Pos
= sub1
!= NULL
? sub1
->getPos() : ruleText
.length();
903 int32_t sub2Pos
= sub2
!= NULL
? sub2
->getPos() : ruleText
.length();
905 // check to see whether the text before the first substitution
906 // matches the text at the beginning of the string being
907 // parsed. If it does, strip that off the front of workText;
908 // otherwise, dump out with a mismatch
909 UnicodeString prefix
;
910 prefix
.setTo(ruleText
, 0, sub1Pos
);
913 fprintf(stderr
, "doParse %p ", this);
920 fprintf(stderr
, " text: '");
921 dumpUS(stderr
, text
);
922 fprintf(stderr
, "' prefix: '");
923 dumpUS(stderr
, prefix
);
925 stripPrefix(workText
, prefix
, pp
);
926 int32_t prefixLength
= text
.length() - workText
.length();
929 fprintf(stderr
, "' pl: %d ppi: %d s1p: %d\n", prefixLength
, pp
.getIndex(), sub1Pos
);
932 if (pp
.getIndex() == 0 && sub1Pos
!= 0) {
933 // commented out because ParsePosition doesn't have error index in 1.1.x
934 // restored for ICU4C port
935 parsePosition
.setErrorIndex(pp
.getErrorIndex());
939 if (baseValue
== kInfinityRule
) {
940 // If you match this, don't try to perform any calculations on it.
941 parsePosition
.setIndex(pp
.getIndex());
942 resVal
.setDouble(uprv_getInfinity());
945 if (baseValue
== kNaNRule
) {
946 // If you match this, don't try to perform any calculations on it.
947 parsePosition
.setIndex(pp
.getIndex());
948 resVal
.setDouble(uprv_getNaN());
952 // Detect when this rule's main job is to parse a decimal format and we're not
954 if (!isDecimFmtParseable
&& sub1
!= NULL
&& sub1
->isDecimalFormatSubstitutionOnly()) {
955 // This is trying to detect a rule like "x.x: =#,##0.#=;"
956 // We used to also check sub2->isRuleSetSubstitutionOnly() to detect this
957 // but now sub2 is usually NULL when we get here, and that test no longer seems to matter.
958 // Need to check into this more.
959 parsePosition
.setErrorIndex(pp
.getErrorIndex());
964 // this is the fun part. The basic guts of the rule-matching
965 // logic is matchToDelimiter(), which is called twice. The first
966 // time it searches the input string for the rule text BETWEEN
967 // the substitutions and tries to match the intervening text
968 // in the input string with the first substitution. If that
969 // succeeds, it then calls it again, this time to look for the
970 // rule text after the second substitution and to match the
971 // intervening input text against the second substitution.
973 // For example, say we have a rule that looks like this:
974 // first << middle >> last;
975 // and input text that looks like this:
976 // first one middle two last
977 // First we use stripPrefix() to match "first " in both places and
978 // strip it off the front, leaving
979 // one middle two last
980 // Then we use matchToDelimiter() to match " middle " and try to
981 // match "one" against a substitution. If it's successful, we now
984 // We use matchToDelimiter() a second time to match " last" and
985 // try to match "two" against a substitution. If "two" matches
986 // the substitution, we have a successful parse.
988 // Since it's possible in many cases to find multiple instances
989 // of each of these pieces of rule text in the input string,
990 // we need to try all the possible combinations of these
991 // locations. This prevents us from prematurely declaring a mismatch,
992 // and makes sure we match as much input text as we can.
993 int highWaterMark
= 0;
996 double tempBaseValue
= (double)(baseValue
<= 0 ? 0 : baseValue
);
1000 // our partial parse result starts out as this rule's base
1001 // value. If it finds a successful match, matchToDelimiter()
1002 // will compose this in some way with what it gets back from
1003 // the substitution, giving us a new partial parse result
1006 temp
.setTo(ruleText
, sub1Pos
, sub2Pos
- sub1Pos
);
1007 double partialResult
= matchToDelimiter(workText
, start
, tempBaseValue
,
1011 // if we got a successful match (or were trying to match a
1012 // null substitution), pp is now pointing at the first unmatched
1013 // character. Take note of that, and try matchToDelimiter()
1014 // on the input text again
1015 if (pp
.getIndex() != 0 || sub1
== NULL
) {
1016 start
= pp
.getIndex();
1018 UnicodeString workText2
;
1019 workText2
.setTo(workText
, pp
.getIndex(), workText
.length() - pp
.getIndex());
1022 // the second matchToDelimiter() will compose our previous
1023 // partial result with whatever it gets back from its
1024 // substitution if there's a successful match, giving us
1026 temp
.setTo(ruleText
, sub2Pos
, ruleText
.length() - sub2Pos
);
1027 partialResult
= matchToDelimiter(workText2
, 0, partialResult
,
1031 // if we got a successful match on this second
1032 // matchToDelimiter() call, update the high-water mark
1033 // and result (if necessary)
1034 if (pp2
.getIndex() != 0 || sub2
== NULL
) {
1035 if (prefixLength
+ pp
.getIndex() + pp2
.getIndex() > highWaterMark
) {
1036 highWaterMark
= prefixLength
+ pp
.getIndex() + pp2
.getIndex();
1037 result
= partialResult
;
1041 // commented out because ParsePosition doesn't have error index in 1.1.x
1042 // restored for ICU4C port
1043 int32_t temp
= pp2
.getErrorIndex() + sub1Pos
+ pp
.getIndex();
1044 if (temp
> parsePosition
.getErrorIndex()) {
1045 parsePosition
.setErrorIndex(temp
);
1050 // commented out because ParsePosition doesn't have error index in 1.1.x
1051 // restored for ICU4C port
1052 int32_t temp
= sub1Pos
+ pp
.getErrorIndex();
1053 if (temp
> parsePosition
.getErrorIndex()) {
1054 parsePosition
.setErrorIndex(temp
);
1057 // keep trying to match things until the outer matchToDelimiter()
1058 // call fails to make a match (each time, it picks up where it
1059 // left off the previous time)
1060 } while (sub1Pos
!= sub2Pos
1061 && pp
.getIndex() > 0
1062 && pp
.getIndex() < workText
.length()
1063 && pp
.getIndex() != start
);
1065 // update the caller's ParsePosition with our high-water mark
1066 // (i.e., it now points at the first character this function
1067 // didn't match-- the ParsePosition is therefore unchanged if
1068 // we didn't match anything)
1069 parsePosition
.setIndex(highWaterMark
);
1070 // commented out because ParsePosition doesn't have error index in 1.1.x
1071 // restored for ICU4C port
1072 if (highWaterMark
> 0) {
1073 parsePosition
.setErrorIndex(0);
1076 // this is a hack for one unusual condition: Normally, whether this
1077 // rule belong to a fraction rule set or not is handled by its
1078 // substitutions. But if that rule HAS NO substitutions, then
1079 // we have to account for it here. By definition, if the matching
1080 // rule in a fraction rule set has no substitutions, its numerator
1081 // is 1, and so the result is the reciprocal of its base value.
1082 if (isFractionRule
&& highWaterMark
> 0 && sub1
== NULL
) {
1083 result
= 1 / result
;
1086 resVal
.setDouble(result
);
1087 return TRUE
; // ??? do we need to worry if it is a long or a double?
1091 * This function is used by parse() to match the text being parsed
1092 * against a possible prefix string. This function
1093 * matches characters from the beginning of the string being parsed
1094 * to characters from the prospective prefix. If they match, pp is
1095 * updated to the first character not matched, and the result is
1096 * the unparsed part of the string. If they don't match, the whole
1097 * string is returned, and pp is left unchanged.
1098 * @param text The string being parsed
1099 * @param prefix The text to match against
1100 * @param pp On entry, ignored and assumed to be 0. On exit, points
1101 * to the first unmatched character (assuming the whole prefix matched),
1102 * or is unchanged (if the whole prefix didn't match).
1103 * @return If things match, this is the unparsed part of "text";
1104 * if they didn't match, this is "text".
1107 NFRule::stripPrefix(UnicodeString
& text
, const UnicodeString
& prefix
, ParsePosition
& pp
) const
1109 // if the prefix text is empty, dump out without doing anything
1110 if (prefix
.length() != 0) {
1111 UErrorCode status
= U_ZERO_ERROR
;
1112 // use prefixLength() to match the beginning of
1113 // "text" against "prefix". This function returns the
1114 // number of characters from "text" that matched (or 0 if
1115 // we didn't match the whole prefix)
1116 int32_t pfl
= prefixLength(text
, prefix
, status
);
1117 if (U_FAILURE(status
)) { // Memory allocation error.
1121 // if we got a successful match, update the parse position
1122 // and strip the prefix off of "text"
1123 pp
.setIndex(pp
.getIndex() + pfl
);
1124 text
.remove(0, pfl
);
1130 * Used by parse() to match a substitution and any following text.
1131 * "text" is searched for instances of "delimiter". For each instance
1132 * of delimiter, the intervening text is tested to see whether it
1133 * matches the substitution. The longest match wins.
1134 * @param text The string being parsed
1135 * @param startPos The position in "text" where we should start looking
1137 * @param baseValue A partial parse result (often the rule's base value),
1138 * which is combined with the result from matching the substitution
1139 * @param delimiter The string to search "text" for.
1140 * @param pp Ignored and presumed to be 0 on entry. If there's a match,
1141 * on exit this will point to the first unmatched character.
1142 * @param sub If we find "delimiter" in "text", this substitution is used
1143 * to match the text between the beginning of the string and the
1144 * position of "delimiter." (If "delimiter" is the empty string, then
1145 * this function just matches against this substitution and updates
1146 * everything accordingly.)
1147 * @param upperBound When matching the substitution, it will only
1148 * consider rules with base values lower than this value.
1149 * @return If there's a match, this is the result of composing
1150 * baseValue with the result of matching the substitution. Otherwise,
1151 * this is new Long(0). It's never null. If the result is an integer,
1152 * this will be an instance of Long; otherwise, it's an instance of
1155 * !!! note {dlf} in point of fact, in the java code the caller always converts
1156 * the result to a double, so we might as well return one.
1159 NFRule::matchToDelimiter(const UnicodeString
& text
,
1162 const UnicodeString
& delimiter
,
1164 const NFSubstitution
* sub
,
1165 double upperBound
) const
1167 UErrorCode status
= U_ZERO_ERROR
;
1168 // if "delimiter" contains real (i.e., non-ignorable) text, search
1169 // it for "delimiter" beginning at "start". If that succeeds, then
1170 // use "sub"'s doParse() method to match the text before the
1171 // instance of "delimiter" we just found.
1172 if (!allIgnorable(delimiter
, status
)) {
1173 if (U_FAILURE(status
)) { //Memory allocation error.
1176 ParsePosition tempPP
;
1179 // use findText() to search for "delimiter". It returns a two-
1180 // element array: element 0 is the position of the match, and
1181 // element 1 is the number of characters that matched
1184 int32_t dPos
= findText(text
, delimiter
, startPos
, &dLen
);
1186 // if findText() succeeded, isolate the text preceding the
1187 // match, and use "sub" to match that text
1189 UnicodeString subText
;
1190 subText
.setTo(text
, 0, dPos
);
1191 if (subText
.length() > 0) {
1192 UBool success
= sub
->doParse(subText
, tempPP
, _baseValue
, upperBound
,
1193 #if UCONFIG_NO_COLLATION
1196 formatter
->isLenient(),
1200 // if the substitution could match all the text up to
1201 // where we found "delimiter", then this function has
1202 // a successful match. Bump the caller's parse position
1203 // to point to the first character after the text
1204 // that matches "delimiter", and return the result
1205 // we got from parsing the substitution.
1206 if (success
&& tempPP
.getIndex() == dPos
) {
1207 pp
.setIndex(dPos
+ dLen
);
1208 return result
.getDouble();
1211 // commented out because ParsePosition doesn't have error index in 1.1.x
1212 // restored for ICU4C port
1213 if (tempPP
.getErrorIndex() > 0) {
1214 pp
.setErrorIndex(tempPP
.getErrorIndex());
1216 pp
.setErrorIndex(tempPP
.getIndex());
1221 // if we didn't match the substitution, search for another
1222 // copy of "delimiter" in "text" and repeat the loop if
1225 dPos
= findText(text
, delimiter
, dPos
+ dLen
, &dLen
);
1227 // if we make it here, this was an unsuccessful match, and we
1228 // leave pp unchanged and return 0
1232 // if "delimiter" is empty, or consists only of ignorable characters
1233 // (i.e., is semantically empty), thwe we obviously can't search
1234 // for "delimiter". Instead, just use "sub" to parse as much of
1235 // "text" as possible.
1237 else if (sub
== NULL
) {
1241 ParsePosition tempPP
;
1244 // try to match the whole string against the substitution
1245 UBool success
= sub
->doParse(text
, tempPP
, _baseValue
, upperBound
,
1246 #if UCONFIG_NO_COLLATION
1249 formatter
->isLenient(),
1252 if (success
&& (tempPP
.getIndex() != 0)) {
1253 // if there's a successful match (or it's a null
1254 // substitution), update pp to point to the first
1255 // character we didn't match, and pass the result from
1256 // sub.doParse() on through to the caller
1257 pp
.setIndex(tempPP
.getIndex());
1258 return result
.getDouble();
1261 // commented out because ParsePosition doesn't have error index in 1.1.x
1262 // restored for ICU4C port
1263 pp
.setErrorIndex(tempPP
.getErrorIndex());
1266 // and if we get to here, then nothing matched, so we return
1267 // 0 and leave pp alone
1273 * Used by stripPrefix() to match characters. If lenient parse mode
1274 * is off, this just calls startsWith(). If lenient parse mode is on,
1275 * this function uses CollationElementIterators to match characters in
1276 * the strings (only primary-order differences are significant in
1277 * determining whether there's a match).
1278 * @param str The string being tested
1279 * @param prefix The text we're hoping to see at the beginning
1281 * @return If "prefix" is found at the beginning of "str", this
1282 * is the number of characters in "str" that were matched (this
1283 * isn't necessarily the same as the length of "prefix" when matching
1284 * text with a collator). If there's no match, this is 0.
1287 NFRule::prefixLength(const UnicodeString
& str
, const UnicodeString
& prefix
, UErrorCode
& status
) const
1289 // if we're looking for an empty prefix, it obviously matches
1290 // zero characters. Just go ahead and return 0.
1291 if (prefix
.length() == 0) {
1295 #if !UCONFIG_NO_COLLATION
1296 // go through all this grief if we're in lenient-parse mode
1297 if (formatter
->isLenient()) {
1298 // get the formatter's collator and use it to create two
1299 // collation element iterators, one over the target string
1300 // and another over the prefix (right now, we'll throw an
1301 // exception if the collator we get back from the formatter
1302 // isn't a RuleBasedCollator, because RuleBasedCollator defines
1303 // the CollationElementIterator protocol. Hopefully, this
1304 // will change someday.)
1305 const RuleBasedCollator
* collator
= formatter
->getCollator();
1306 if (collator
== NULL
) {
1307 status
= U_MEMORY_ALLOCATION_ERROR
;
1310 LocalPointer
<CollationElementIterator
> strIter(collator
->createCollationElementIterator(str
));
1311 LocalPointer
<CollationElementIterator
> prefixIter(collator
->createCollationElementIterator(prefix
));
1312 // Check for memory allocation error.
1313 if (strIter
.isNull() || prefixIter
.isNull()) {
1314 status
= U_MEMORY_ALLOCATION_ERROR
;
1318 UErrorCode err
= U_ZERO_ERROR
;
1320 // The original code was problematic. Consider this match:
1321 // prefix = "fifty-"
1322 // string = " fifty-7"
1323 // The intent is to match string up to the '7', by matching 'fifty-' at position 1
1324 // in the string. Unfortunately, we were getting a match, and then computing where
1325 // the match terminated by rematching the string. The rematch code was using as an
1326 // initial guess the substring of string between 0 and prefix.length. Because of
1327 // the leading space and trailing hyphen (both ignorable) this was succeeding, leaving
1328 // the position before the hyphen in the string. Recursing down, we then parsed the
1329 // remaining string '-7' as numeric. The resulting number turned out as 43 (50 - 7).
1330 // This was not pretty, especially since the string "fifty-7" parsed just fine.
1332 // We have newer APIs now, so we can use calls on the iterator to determine what we
1333 // matched up to. If we terminate because we hit the last element in the string,
1334 // our match terminates at this length. If we terminate because we hit the last element
1335 // in the target, our match terminates at one before the element iterator position.
1337 // match collation elements between the strings
1338 int32_t oStr
= strIter
->next(err
);
1339 int32_t oPrefix
= prefixIter
->next(err
);
1341 while (oPrefix
!= CollationElementIterator::NULLORDER
) {
1342 // skip over ignorable characters in the target string
1343 while (CollationElementIterator::primaryOrder(oStr
) == 0
1344 && oStr
!= CollationElementIterator::NULLORDER
) {
1345 oStr
= strIter
->next(err
);
1348 // skip over ignorable characters in the prefix
1349 while (CollationElementIterator::primaryOrder(oPrefix
) == 0
1350 && oPrefix
!= CollationElementIterator::NULLORDER
) {
1351 oPrefix
= prefixIter
->next(err
);
1354 // dlf: move this above following test, if we consume the
1355 // entire target, aren't we ok even if the source was also
1356 // entirely consumed?
1358 // if skipping over ignorables brought to the end of
1359 // the prefix, we DID match: drop out of the loop
1360 if (oPrefix
== CollationElementIterator::NULLORDER
) {
1364 // if skipping over ignorables brought us to the end
1365 // of the target string, we didn't match and return 0
1366 if (oStr
== CollationElementIterator::NULLORDER
) {
1370 // match collation elements from the two strings
1371 // (considering only primary differences). If we
1372 // get a mismatch, dump out and return 0
1373 if (CollationElementIterator::primaryOrder(oStr
)
1374 != CollationElementIterator::primaryOrder(oPrefix
)) {
1377 // otherwise, advance to the next character in each string
1378 // and loop (we drop out of the loop when we exhaust
1379 // collation elements in the prefix)
1381 oStr
= strIter
->next(err
);
1382 oPrefix
= prefixIter
->next(err
);
1386 int32_t result
= strIter
->getOffset();
1387 if (oStr
!= CollationElementIterator::NULLORDER
) {
1388 --result
; // back over character that we don't want to consume;
1392 fprintf(stderr
, "prefix length: %d\n", result
);
1396 //----------------------------------------------------------------
1397 // JDK 1.2-specific API call
1398 // return strIter.getOffset();
1399 //----------------------------------------------------------------
1400 // JDK 1.1 HACK (take out for 1.2-specific code)
1402 // if we make it to here, we have a successful match. Now we
1403 // have to find out HOW MANY characters from the target string
1404 // matched the prefix (there isn't necessarily a one-to-one
1405 // mapping between collation elements and characters).
1406 // In JDK 1.2, there's a simple getOffset() call we can use.
1407 // In JDK 1.1, on the other hand, we have to go through some
1408 // ugly contortions. First, use the collator to compare the
1409 // same number of characters from the prefix and target string.
1410 // If they're equal, we're done.
1411 collator
->setStrength(Collator::PRIMARY
);
1412 if (str
.length() >= prefix
.length()) {
1414 temp
.setTo(str
, 0, prefix
.length());
1415 if (collator
->equals(temp
, prefix
)) {
1417 fprintf(stderr
, "returning: %d\n", prefix
.length());
1419 return prefix
.length();
1423 // if they're not equal, then we have to compare successively
1424 // larger and larger substrings of the target string until we
1425 // get to one that matches the prefix. At that point, we know
1426 // how many characters matched the prefix, and we can return.
1428 while (p
<= str
.length()) {
1430 temp
.setTo(str
, 0, p
);
1431 if (collator
->equals(temp
, prefix
)) {
1438 // SHOULD NEVER GET HERE!!!
1440 //----------------------------------------------------------------
1443 // If lenient parsing is turned off, forget all that crap above.
1444 // Just use String.startsWith() and be done with it.
1448 if (str
.startsWith(prefix
)) {
1449 return prefix
.length();
1457 * Searches a string for another string. If lenient parsing is off,
1458 * this just calls indexOf(). If lenient parsing is on, this function
1459 * uses CollationElementIterator to match characters, and only
1460 * primary-order differences are significant in determining whether
1462 * @param str The string to search
1463 * @param key The string to search "str" for
1464 * @param startingAt The index into "str" where the search is to
1466 * @return A two-element array of ints. Element 0 is the position
1467 * of the match, or -1 if there was no match. Element 1 is the
1468 * number of characters in "str" that matched (which isn't necessarily
1469 * the same as the length of "key")
1472 NFRule::findText(const UnicodeString
& str
,
1473 const UnicodeString
& key
,
1475 int32_t* length
) const
1477 if (rulePatternFormat
) {
1479 FieldPosition
position(UNUM_INTEGER_FIELD
);
1480 position
.setBeginIndex(startingAt
);
1481 rulePatternFormat
->parseType(str
, this, result
, position
);
1482 int start
= position
.getBeginIndex();
1484 int32_t pluralRuleStart
= ruleText
.indexOf(gDollarOpenParenthesis
, -1, 0);
1485 int32_t pluralRuleSuffix
= ruleText
.indexOf(gClosedParenthesisDollar
, -1, pluralRuleStart
) + 2;
1486 int32_t matchLen
= position
.getEndIndex() - start
;
1487 UnicodeString
prefix(ruleText
.tempSubString(0, pluralRuleStart
));
1488 UnicodeString
suffix(ruleText
.tempSubString(pluralRuleSuffix
));
1489 if (str
.compare(start
- prefix
.length(), prefix
.length(), prefix
, 0, prefix
.length()) == 0
1490 && str
.compare(start
+ matchLen
, suffix
.length(), suffix
, 0, suffix
.length()) == 0)
1492 *length
= matchLen
+ prefix
.length() + suffix
.length();
1493 return start
- prefix
.length();
1499 if (!formatter
->isLenient()) {
1500 // if lenient parsing is turned off, this is easy: just call
1501 // String.indexOf() and we're done
1502 *length
= key
.length();
1503 return str
.indexOf(key
, startingAt
);
1506 // but if lenient parsing is turned ON, we've got some work
1508 return findTextLenient(str
, key
, startingAt
, length
);
1513 NFRule::findTextLenient(const UnicodeString
& str
,
1514 const UnicodeString
& key
,
1516 int32_t* length
) const
1518 //----------------------------------------------------------------
1519 // JDK 1.1 HACK (take out of 1.2-specific code)
1521 // in JDK 1.2, CollationElementIterator provides us with an
1522 // API to map between character offsets and collation elements
1523 // and we can do this by marching through the string comparing
1524 // collation elements. We can't do that in JDK 1.1. Insted,
1525 // we have to go through this horrible slow mess:
1526 int32_t p
= startingAt
;
1529 // basically just isolate smaller and smaller substrings of
1530 // the target string (each running to the end of the string,
1531 // and with the first one running from startingAt to the end)
1532 // and then use prefixLength() to see if the search key is at
1533 // the beginning of each substring. This is excruciatingly
1534 // slow, but it will locate the key and tell use how long the
1535 // matching text was.
1537 UErrorCode status
= U_ZERO_ERROR
;
1538 while (p
< str
.length() && keyLen
== 0) {
1539 temp
.setTo(str
, p
, str
.length() - p
);
1540 keyLen
= prefixLength(temp
, key
, status
);
1541 if (U_FAILURE(status
)) {
1550 // if we make it to here, we didn't find it. Return -1 for the
1551 // location. The length should be ignored, but set it to 0,
1552 // which should be "safe"
1558 * Checks to see whether a string consists entirely of ignorable
1560 * @param str The string to test.
1561 * @return true if the string is empty of consists entirely of
1562 * characters that the number formatter's collator says are
1563 * ignorable at the primary-order level. false otherwise.
1566 NFRule::allIgnorable(const UnicodeString
& str
, UErrorCode
& status
) const
1568 // if the string is empty, we can just return true
1569 if (str
.length() == 0) {
1573 #if !UCONFIG_NO_COLLATION
1574 // if lenient parsing is turned on, walk through the string with
1575 // a collation element iterator and make sure each collation
1576 // element is 0 (ignorable) at the primary level
1577 if (formatter
->isLenient()) {
1578 const RuleBasedCollator
* collator
= formatter
->getCollator();
1579 if (collator
== NULL
) {
1580 status
= U_MEMORY_ALLOCATION_ERROR
;
1583 LocalPointer
<CollationElementIterator
> iter(collator
->createCollationElementIterator(str
));
1585 // Memory allocation error check.
1586 if (iter
.isNull()) {
1587 status
= U_MEMORY_ALLOCATION_ERROR
;
1591 UErrorCode err
= U_ZERO_ERROR
;
1592 int32_t o
= iter
->next(err
);
1593 while (o
!= CollationElementIterator::NULLORDER
1594 && CollationElementIterator::primaryOrder(o
) == 0) {
1595 o
= iter
->next(err
);
1598 return o
== CollationElementIterator::NULLORDER
;
1602 // if lenient parsing is turned off, there is no such thing as
1603 // an ignorable character: return true only if the string is empty
1608 NFRule::setDecimalFormatSymbols(const DecimalFormatSymbols
& newSymbols
, UErrorCode
& status
) {
1610 sub1
->setDecimalFormatSymbols(newSymbols
, status
);
1613 sub2
->setDecimalFormatSymbols(newSymbols
, status
);