X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/4388f060552cc537e71e957d32f35e9d75a61233..249c4c5ea9376c24572daf9c2effa7484a282f14:/icuSources/i18n/nfrule.cpp diff --git a/icuSources/i18n/nfrule.cpp b/icuSources/i18n/nfrule.cpp index c3561205..12c5b9e1 100644 --- a/icuSources/i18n/nfrule.cpp +++ b/icuSources/i18n/nfrule.cpp @@ -1,10 +1,12 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** -* Copyright (C) 1997-2011, International Business Machines +* Copyright (C) 1997-2015, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrule.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -17,32 +19,47 @@ #if U_HAVE_RBNF +#include "unicode/localpointer.h" #include "unicode/rbnf.h" #include "unicode/tblcoll.h" +#include "unicode/plurfmt.h" +#include "unicode/upluralrules.h" #include "unicode/coleitr.h" #include "unicode/uchar.h" #include "nfrs.h" #include "nfrlist.h" #include "nfsubs.h" #include "patternprops.h" +#include "putilimp.h" U_NAMESPACE_BEGIN -NFRule::NFRule(const RuleBasedNumberFormat* _rbnf) +NFRule::NFRule(const RuleBasedNumberFormat* _rbnf, const UnicodeString &_ruleText, UErrorCode &status) : baseValue((int32_t)0) - , radix(0) + , radix(10) , exponent(0) - , ruleText() + , decimalPoint(0) + , ruleText(_ruleText) , sub1(NULL) , sub2(NULL) , formatter(_rbnf) + , rulePatternFormat(NULL) { + if (!ruleText.isEmpty()) { + parseRuleDescriptor(ruleText, status); + } } NFRule::~NFRule() { - delete sub1; - delete sub2; + if (sub1 != sub2) { + delete sub2; + sub2 = NULL; + } + delete sub1; + sub1 = NULL; + delete rulePatternFormat; + rulePatternFormat = NULL; } static const UChar gLeftBracket = 0x005b; @@ -59,11 +76,14 @@ static const UChar gDot = 0x002e; static const UChar gTick = 0x0027; //static const UChar gMinus = 0x002d; static const UChar gSemicolon = 0x003b; +static const UChar gX = 0x0078; static const UChar gMinusX[] = {0x2D, 0x78, 0}; /* "-x" */ -static const UChar gXDotX[] = {0x78, 0x2E, 0x78, 0}; /* "x.x" */ -static const UChar gXDotZero[] = {0x78, 0x2E, 0x30, 0}; /* "x.0" */ -static const UChar gZeroDotX[] = {0x30, 0x2E, 0x78, 0}; /* "0.x" */ +static const UChar gInf[] = {0x49, 0x6E, 0x66, 0}; /* "Inf" */ +static const UChar gNaN[] = {0x4E, 0x61, 0x4E, 0}; /* "NaN" */ + +static const UChar gDollarOpenParenthesis[] = {0x24, 0x28, 0}; /* "$(" */ +static const UChar gClosedParenthesisDollar[] = {0x29, 0x24, 0}; /* ")$" */ static const UChar gLessLess[] = {0x3C, 0x3C, 0}; /* "<<" */ static const UChar gLessPercent[] = {0x3C, 0x25, 0}; /* "<%" */ @@ -78,7 +98,7 @@ static const UChar gEqualHash[] = {0x3D, 0x23, 0}; /* "=#" */ static const UChar gEqualZero[] = {0x3D, 0x30, 0}; /* "=0" */ static const UChar gGreaterGreaterGreater[] = {0x3E, 0x3E, 0x3E, 0}; /* ">>>" */ -static const UChar * const tokenStrings[] = { +static const UChar * const RULE_PREFIXES[] = { gLessLess, gLessPercent, gLessHash, gLessZero, gGreaterGreater, gGreaterPercent,gGreaterHash, gGreaterZero, gEqualPercent, gEqualHash, gEqualZero, NULL @@ -86,7 +106,7 @@ static const UChar * const tokenStrings[] = { void NFRule::makeRules(UnicodeString& description, - const NFRuleSet *ruleSet, + NFRuleSet *owner, const NFRule *predecessor, const RuleBasedNumberFormat *rbnf, NFRuleList& rules, @@ -96,30 +116,32 @@ NFRule::makeRules(UnicodeString& description, // new it up and initialize its basevalue and divisor // (this also strips the rule descriptor, if any, off the // descripton string) - NFRule* rule1 = new NFRule(rbnf); + NFRule* rule1 = new NFRule(rbnf, description, status); /* test for NULL */ if (rule1 == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } - rule1->parseRuleDescriptor(description, status); + description = rule1->ruleText; // check the description to see whether there's text enclosed // in brackets int32_t brack1 = description.indexOf(gLeftBracket); - int32_t brack2 = description.indexOf(gRightBracket); + int32_t brack2 = brack1 < 0 ? -1 : description.indexOf(gRightBracket); // if the description doesn't contain a matched pair of brackets, // or if it's of a type that doesn't recognize bracketed text, // then leave the description alone, initialize the rule's // rule text and substitutions, and return that rule - if (brack1 == -1 || brack2 == -1 || brack1 > brack2 + if (brack2 < 0 || brack1 > brack2 || rule1->getType() == kProperFractionRule - || rule1->getType() == kNegativeNumberRule) { - rule1->ruleText = description; - rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status); - rules.add(rule1); - } else { + || rule1->getType() == kNegativeNumberRule + || rule1->getType() == kInfinityRule + || rule1->getType() == kNaNRule) + { + rule1->extractSubstitutions(owner, description, predecessor, status); + } + else { // if the description does contain a matched pair of brackets, // then it's really shorthand for two rules (with one exception) NFRule* rule2 = NULL; @@ -138,7 +160,7 @@ NFRule::makeRules(UnicodeString& description, // set, they both have the same base value; otherwise, // increment the original rule's base value ("rule1" actually // goes SECOND in the rule set's rule list) - rule2 = new NFRule(rbnf); + rule2 = new NFRule(rbnf, UnicodeString(), status); /* test for NULL */ if (rule2 == 0) { status = U_MEMORY_ALLOCATION_ERROR; @@ -146,7 +168,7 @@ NFRule::makeRules(UnicodeString& description, } if (rule1->baseValue >= 0) { rule2->baseValue = rule1->baseValue; - if (!ruleSet->isFractionRuleSet()) { + if (!owner->isFractionRuleSet()) { ++rule1->baseValue; } } @@ -177,8 +199,7 @@ NFRule::makeRules(UnicodeString& description, if (brack2 + 1 < description.length()) { sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); } - rule2->ruleText.setTo(sbuf); - rule2->extractSubstitutions(ruleSet, predecessor, rbnf, status); + rule2->extractSubstitutions(owner, sbuf, predecessor, status); } // rule1's text includes the text in the brackets but omits @@ -189,8 +210,7 @@ NFRule::makeRules(UnicodeString& description, if (brack2 + 1 < description.length()) { sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); } - rule1->ruleText.setTo(sbuf); - rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status); + rule1->extractSubstitutions(owner, sbuf, predecessor, status); // if we only have one rule, return it; if we have two, return // a two-element array containing them (notice that rule2 goes @@ -198,10 +218,20 @@ NFRule::makeRules(UnicodeString& description, // material in the brackets and rule1 INCLUDES the material // in the brackets) if (rule2 != NULL) { - rules.add(rule2); + if (rule2->baseValue >= kNoBase) { + rules.add(rule2); + } + else { + owner->setNonNumericalRule(rule2); + } } + } + if (rule1->baseValue >= kNoBase) { rules.add(rule1); } + else { + owner->setNonNumericalRule(rule1); + } } /** @@ -223,9 +253,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) // separated by a colon. The rule descriptor is optional. If // it's omitted, just set the base value to 0. int32_t p = description.indexOf(gColon); - if (p == -1) { - setBaseValue((int32_t)0, status); - } else { + if (p != -1) { // copy the descriptor out into its own string and strip it, // along with any trailing whitespace, out of the original // description @@ -240,25 +268,15 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) // check first to see if the rule descriptor matches the token // for one of the special rules. If it does, set the base - // value to the correct identfier value - if (0 == descriptor.compare(gMinusX, 2)) { - setType(kNegativeNumberRule); - } - else if (0 == descriptor.compare(gXDotX, 3)) { - setType(kImproperFractionRule); - } - else if (0 == descriptor.compare(gZeroDotX, 3)) { - setType(kProperFractionRule); - } - else if (0 == descriptor.compare(gXDotZero, 3)) { - setType(kMasterRule); - } - - // if the rule descriptor begins with a digit, it's a descriptor - // for a normal rule - // since we don't have Long.parseLong, and this isn't much work anyway, - // just build up the value as we encounter the digits. - else if (descriptor.charAt(0) >= gZero && descriptor.charAt(0) <= gNine) { + // value to the correct identifier value + int descriptorLength = descriptor.length(); + UChar firstChar = descriptor.charAt(0); + UChar lastChar = descriptor.charAt(descriptorLength - 1); + if (firstChar >= gZero && firstChar <= gNine && lastChar != gX) { + // if the rule descriptor begins with a digit, it's a descriptor + // for a normal rule + // since we don't have Long.parseLong, and this isn't much work anyway, + // just build up the value as we encounter the digits. int64_t val = 0; p = 0; UChar c = gSpace; @@ -268,7 +286,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) // stop on a slash or > sign (or at the end of the string), // and throw an exception on any other character int64_t ll_10 = 10; - while (p < descriptor.length()) { + while (p < descriptorLength) { c = descriptor.charAt(p); if (c >= gZero && c <= gNine) { val = val * ll_10 + (int32_t)(c - gZero); @@ -297,7 +315,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) val = 0; ++p; int64_t ll_10 = 10; - while (p < descriptor.length()) { + while (p < descriptorLength) { c = descriptor.charAt(p); if (c >= gZero && c <= gNine) { val = val * ll_10 + (int32_t)(c - gZero); @@ -345,7 +363,31 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) } } } + else if (0 == descriptor.compare(gMinusX, 2)) { + setType(kNegativeNumberRule); + } + else if (descriptorLength == 3) { + if (firstChar == gZero && lastChar == gX) { + setBaseValue(kProperFractionRule, status); + decimalPoint = descriptor.charAt(1); + } + else if (firstChar == gX && lastChar == gX) { + setBaseValue(kImproperFractionRule, status); + decimalPoint = descriptor.charAt(1); + } + else if (firstChar == gX && lastChar == gZero) { + setBaseValue(kMasterRule, status); + decimalPoint = descriptor.charAt(1); + } + else if (descriptor.compare(gNaN, 3) == 0) { + setBaseValue(kNaNRule, status); + } + else if (descriptor.compare(gInf, 3) == 0) { + setBaseValue(kInfinityRule, status); + } + } } + // else use the default base value for now. // finally, if the rule body begins with an apostrophe, strip it off // (this is generally used to put whitespace at the beginning of @@ -369,13 +411,44 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) */ void NFRule::extractSubstitutions(const NFRuleSet* ruleSet, + const UnicodeString &ruleText, const NFRule* predecessor, - const RuleBasedNumberFormat* rbnf, UErrorCode& status) { - if (U_SUCCESS(status)) { - sub1 = extractSubstitution(ruleSet, predecessor, rbnf, status); - sub2 = extractSubstitution(ruleSet, predecessor, rbnf, status); + if (U_FAILURE(status)) { + return; + } + this->ruleText = ruleText; + sub1 = extractSubstitution(ruleSet, predecessor, status); + if (sub1 == NULL) { + // Small optimization. There is no need to create a redundant NullSubstitution. + sub2 = NULL; + } + else { + sub2 = extractSubstitution(ruleSet, predecessor, status); + } + int32_t pluralRuleStart = this->ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int32_t pluralRuleEnd = (pluralRuleStart >= 0 ? this->ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) : -1); + if (pluralRuleEnd >= 0) { + int32_t endType = this->ruleText.indexOf(gComma, pluralRuleStart); + if (endType < 0) { + status = U_PARSE_ERROR; + return; + } + UnicodeString type(this->ruleText.tempSubString(pluralRuleStart + 2, endType - pluralRuleStart - 2)); + UPluralType pluralType; + if (type.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) { + pluralType = UPLURAL_TYPE_CARDINAL; + } + else if (type.startsWith(UNICODE_STRING_SIMPLE("ordinal"))) { + pluralType = UPLURAL_TYPE_ORDINAL; + } + else { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + rulePatternFormat = formatter->createPluralFormat(pluralType, + this->ruleText.tempSubString(endType + 1, pluralRuleEnd - endType - 1), status); } } @@ -394,21 +467,19 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet, NFSubstitution * NFRule::extractSubstitution(const NFRuleSet* ruleSet, const NFRule* predecessor, - const RuleBasedNumberFormat* rbnf, UErrorCode& status) { NFSubstitution* result = NULL; // search the rule's rule text for the first two characters of // a substitution token - int32_t subStart = indexOfAny(tokenStrings); + int32_t subStart = indexOfAnyRulePrefix(); int32_t subEnd = subStart; // if we didn't find one, create a null substitution positioned // at the end of the rule text if (subStart == -1) { - return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor, - ruleSet, rbnf, UnicodeString(), status); + return NULL; } // special-case the ">>>" token, since searching for the > at the @@ -435,8 +506,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet, // unmatched token character), create a null substitution positioned // at the end of the rule if (subEnd == -1) { - return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor, - ruleSet, rbnf, UnicodeString(), status); + return NULL; } // if we get here, we have a real substitution token (or at least @@ -445,7 +515,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet, UnicodeString subToken; subToken.setTo(ruleText, subStart, subEnd + 1 - subStart); result = NFSubstitution::makeSubstitution(subStart, this, predecessor, ruleSet, - rbnf, subToken, status); + this->formatter, subToken, status); // remove the substitution from the rule text ruleText.removeBetween(subStart, subEnd+1); @@ -465,6 +535,7 @@ NFRule::setBaseValue(int64_t newBaseValue, UErrorCode& status) { // set the base value baseValue = newBaseValue; + radix = 10; // if this isn't a special rule, recalculate the radix and exponent // (the radix always defaults to 10; if it's supposed to be something @@ -472,7 +543,6 @@ NFRule::setBaseValue(int64_t newBaseValue, UErrorCode& status) // recalculated again-- the only function that does this is // NFRule.parseRuleDescriptor() ) if (baseValue >= 1) { - radix = 10; exponent = expectedExponent(); // this function gets called on a fully-constructed rule whose @@ -489,7 +559,6 @@ NFRule::setBaseValue(int64_t newBaseValue, UErrorCode& status) // if this is a special rule, its radix and exponent are basically // ignored. Set them to "safe" default values } else { - radix = 10; exponent = 0; } } @@ -522,19 +591,17 @@ NFRule::expectedExponent() const /** * Searches the rule's rule text for any of the specified strings. - * @param strings An array of strings to search the rule's rule - * text for * @return The index of the first match in the rule's rule text * (i.e., the first substring in the rule's rule text that matches * _any_ of the strings in "strings"). If none of the strings in * "strings" is found in the rule's rule text, returns -1. */ int32_t -NFRule::indexOfAny(const UChar* const strings[]) const +NFRule::indexOfAnyRulePrefix() const { int result = -1; - for (int i = 0; strings[i]; i++) { - int32_t pos = ruleText.indexOf(*strings[i]); + for (int i = 0; RULE_PREFIXES[i]; i++) { + int32_t pos = ruleText.indexOf(*RULE_PREFIXES[i]); if (pos != -1 && (result == -1 || pos < result)) { result = pos; } @@ -546,6 +613,19 @@ NFRule::indexOfAny(const UChar* const strings[]) const // boilerplate //----------------------------------------------------------------------- +static UBool +util_equalSubstitutions(const NFSubstitution* sub1, const NFSubstitution* sub2) +{ + if (sub1) { + if (sub2) { + return *sub1 == *sub2; + } + } else if (!sub2) { + return TRUE; + } + return FALSE; +} + /** * Tests two rules for equality. * @param that The rule to compare this one against @@ -558,8 +638,8 @@ NFRule::operator==(const NFRule& rhs) const && radix == rhs.radix && exponent == rhs.exponent && ruleText == rhs.ruleText - && *sub1 == *rhs.sub1 - && *sub2 == *rhs.sub2; + && util_equalSubstitutions(sub1, rhs.sub1) + && util_equalSubstitutions(sub2, rhs.sub2); } /** @@ -581,9 +661,11 @@ NFRule::_appendRuleText(UnicodeString& result) const { switch (getType()) { case kNegativeNumberRule: result.append(gMinusX, 2); break; - case kImproperFractionRule: result.append(gXDotX, 3); break; - case kProperFractionRule: result.append(gZeroDotX, 3); break; - case kMasterRule: result.append(gXDotZero, 3); break; + case kImproperFractionRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break; + case kProperFractionRule: result.append(gZero).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break; + case kMasterRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gZero); break; + case kInfinityRule: result.append(gInf, 3); break; + case kNaNRule: result.append(gNaN, 3); break; default: // for a normal rule, write out its base value, and if the radix is // something other than 10, write out the radix (with the preceding @@ -608,7 +690,7 @@ NFRule::_appendRuleText(UnicodeString& result) const // if the rule text begins with a space, write an apostrophe // (whitespace after the rule descriptor is ignored; the // apostrophe is used to make the whitespace significant) - if (ruleText.charAt(0) == gSpace && sub1->getPos() != 0) { + if (ruleText.charAt(0) == gSpace && (sub1 == NULL || sub1->getPos() != 0)) { result.append(gTick); } @@ -618,10 +700,14 @@ NFRule::_appendRuleText(UnicodeString& result) const ruleTextCopy.setTo(ruleText); UnicodeString temp; - sub2->toString(temp); - ruleTextCopy.insert(sub2->getPos(), temp); - sub1->toString(temp); - ruleTextCopy.insert(sub1->getPos(), temp); + if (sub2 != NULL) { + sub2->toString(temp); + ruleTextCopy.insert(sub2->getPos(), temp); + } + if (sub1 != NULL) { + sub1->toString(temp); + ruleTextCopy.insert(sub1->getPos(), temp); + } result.append(ruleTextCopy); @@ -630,6 +716,12 @@ NFRule::_appendRuleText(UnicodeString& result) const result.append(gSemicolon); } +int64_t NFRule::getDivisor() const +{ + return util64_pow(radix, exponent); +} + + //----------------------------------------------------------------------- // formatting //----------------------------------------------------------------------- @@ -644,16 +736,39 @@ NFRule::_appendRuleText(UnicodeString& result) const * should be inserted */ void -NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos) const +NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const { // first, insert the rule's rule text into toInsertInto at the // specified position, then insert the results of the substitutions // into the right places in toInsertInto (notice we do the // substitutions in reverse order so that the offsets don't get // messed up) - toInsertInto.insert(pos, ruleText); - sub2->doSubstitution(number, toInsertInto, pos); - sub1->doSubstitution(number, toInsertInto, pos); + int32_t pluralRuleStart = ruleText.length(); + int32_t lengthOffset = 0; + if (!rulePatternFormat) { + toInsertInto.insert(pos, ruleText); + } + else { + pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int pluralRuleEnd = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); + int initialLength = toInsertInto.length(); + if (pluralRuleEnd < ruleText.length() - 1) { + toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2)); + } + toInsertInto.insert(pos, + rulePatternFormat->format((int32_t)(number/util64_pow(radix, exponent)), status)); + if (pluralRuleStart > 0) { + toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); + } + lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength); + } + + if (sub2 != NULL) { + sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); + } + if (sub1 != NULL) { + sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); + } } /** @@ -666,7 +781,7 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos) const * should be inserted */ void -NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const +NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const { // first, insert the rule's rule text into toInsertInto at the // specified position, then insert the results of the substitutions @@ -674,9 +789,40 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const // [again, we have two copies of this routine that do the same thing // so that we don't sacrifice precision in a long by casting it // to a double] - toInsertInto.insert(pos, ruleText); - sub2->doSubstitution(number, toInsertInto, pos); - sub1->doSubstitution(number, toInsertInto, pos); + int32_t pluralRuleStart = ruleText.length(); + int32_t lengthOffset = 0; + if (!rulePatternFormat) { + toInsertInto.insert(pos, ruleText); + } + else { + pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int pluralRuleEnd = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); + int initialLength = toInsertInto.length(); + if (pluralRuleEnd < ruleText.length() - 1) { + toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2)); + } + double pluralVal = number; + if (0 <= pluralVal && pluralVal < 1) { + // We're in a fractional rule, and we have to match the NumeratorSubstitution behavior. + // 2.3 can become 0.2999999999999998 for the fraction due to rounding errors. + pluralVal = uprv_round(pluralVal * util64_pow(radix, exponent)); + } + else { + pluralVal = pluralVal / util64_pow(radix, exponent); + } + toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(pluralVal), status)); + if (pluralRuleStart > 0) { + toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); + } + lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength); + } + + if (sub2 != NULL) { + sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); + } + if (sub1 != NULL) { + sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); + } } /** @@ -688,7 +834,7 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const * this one in its list; false if it should use this rule */ UBool -NFRule::shouldRollBack(double number) const +NFRule::shouldRollBack(int64_t number) const { // we roll back if the rule contains a modulus substitution, // the number being formatted is an even multiple of the rule's @@ -706,9 +852,9 @@ NFRule::shouldRollBack(double number) const // a modulus substitution, its base value isn't an even multiple // of 100, and the value we're trying to format _is_ an even // multiple of 100. This is called the "rollback rule." - if ((sub1->isModulusSubstitution()) || (sub2->isModulusSubstitution())) { + if ((sub1 != NULL && sub1->isModulusSubstitution()) || (sub2 != NULL && sub2->isModulusSubstitution())) { int64_t re = util64_pow(radix, exponent); - return uprv_fmod(number, (double)re) == 0 && (baseValue % re) != 0; + return (number % re) == 0 && (baseValue % re) != 0; } return FALSE; } @@ -749,12 +895,12 @@ static void dumpUS(FILE* f, const UnicodeString& us) { } } #endif - UBool NFRule::doParse(const UnicodeString& text, ParsePosition& parsePosition, UBool isFractionRule, double upperBound, + uint32_t nonNumericalExecutedRuleMask, Formattable& resVal, UBool isDecimFmtParseable) const { @@ -763,22 +909,25 @@ NFRule::doParse(const UnicodeString& text, ParsePosition pp; UnicodeString workText(text); + int32_t sub1Pos = sub1 != NULL ? sub1->getPos() : ruleText.length(); + int32_t sub2Pos = sub2 != NULL ? sub2->getPos() : ruleText.length(); + // check to see whether the text before the first substitution // matches the text at the beginning of the string being // parsed. If it does, strip that off the front of workText; // otherwise, dump out with a mismatch UnicodeString prefix; - prefix.setTo(ruleText, 0, sub1->getPos()); + prefix.setTo(ruleText, 0, sub1Pos); #ifdef RBNF_DEBUG - fprintf(stderr, "doParse %x ", this); + fprintf(stderr, "doParse %p ", this); { UnicodeString rt; _appendRuleText(rt); dumpUS(stderr, rt); } - fprintf(stderr, " text: '", this); + fprintf(stderr, " text: '"); dumpUS(stderr, text); fprintf(stderr, "' prefix: '"); dumpUS(stderr, prefix); @@ -787,26 +936,39 @@ NFRule::doParse(const UnicodeString& text, int32_t prefixLength = text.length() - workText.length(); #ifdef RBNF_DEBUG - fprintf(stderr, "' pl: %d ppi: %d s1p: %d\n", prefixLength, pp.getIndex(), sub1->getPos()); + fprintf(stderr, "' pl: %d ppi: %d s1p: %d\n", prefixLength, pp.getIndex(), sub1Pos); #endif - if (pp.getIndex() == 0 && sub1->getPos() != 0) { + if (pp.getIndex() == 0 && sub1Pos != 0) { // commented out because ParsePosition doesn't have error index in 1.1.x // restored for ICU4C port parsePosition.setErrorIndex(pp.getErrorIndex()); resVal.setLong(0); return TRUE; } + if (baseValue == kInfinityRule) { + // If you match this, don't try to perform any calculations on it. + parsePosition.setIndex(pp.getIndex()); + resVal.setDouble(uprv_getInfinity()); + return TRUE; + } + if (baseValue == kNaNRule) { + // If you match this, don't try to perform any calculations on it. + parsePosition.setIndex(pp.getIndex()); + resVal.setDouble(uprv_getNaN()); + return TRUE; + } // Detect when this rule's main job is to parse a decimal format and we're not // supposed to. - if (!isDecimFmtParseable) { - // The following tries to detect a rule like "x.x: =#,##0.#=;" - if ( sub1->isDecimalFormatSubstitutionOnly() && sub2->isRuleSetSubstitutionOnly() ) { - parsePosition.setErrorIndex(pp.getErrorIndex()); - resVal.setLong(0); - return TRUE; - } + if (!isDecimFmtParseable && sub1 != NULL && sub1->isDecimalFormatSubstitutionOnly()) { + // This is trying to detect a rule like "x.x: =#,##0.#=;" + // We used to also check sub2->isRuleSetSubstitutionOnly() to detect this + // but now sub2 is usually NULL when we get here, and that test no longer seems to matter. + // Need to check into this more. + parsePosition.setErrorIndex(pp.getErrorIndex()); + resVal.setLong(0); + return TRUE; } // this is the fun part. The basic guts of the rule-matching @@ -851,16 +1013,17 @@ NFRule::doParse(const UnicodeString& text, // the substitution, giving us a new partial parse result pp.setIndex(0); - temp.setTo(ruleText, sub1->getPos(), sub2->getPos() - sub1->getPos()); + temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos); double partialResult = matchToDelimiter(workText, start, tempBaseValue, temp, pp, sub1, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match (or were trying to match a // null substitution), pp is now pointing at the first unmatched // character. Take note of that, and try matchToDelimiter() // on the input text again - if (pp.getIndex() != 0 || sub1->isNullSubstitution()) { + if (pp.getIndex() != 0 || sub1 == NULL) { start = pp.getIndex(); UnicodeString workText2; @@ -871,33 +1034,34 @@ NFRule::doParse(const UnicodeString& text, // partial result with whatever it gets back from its // substitution if there's a successful match, giving us // a real result - temp.setTo(ruleText, sub2->getPos(), ruleText.length() - sub2->getPos()); + temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos); partialResult = matchToDelimiter(workText2, 0, partialResult, temp, pp2, sub2, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match on this second // matchToDelimiter() call, update the high-water mark // and result (if necessary) - if (pp2.getIndex() != 0 || sub2->isNullSubstitution()) { + if (pp2.getIndex() != 0 || sub2 == NULL) { if (prefixLength + pp.getIndex() + pp2.getIndex() > highWaterMark) { highWaterMark = prefixLength + pp.getIndex() + pp2.getIndex(); result = partialResult; } } - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port else { - int32_t temp = pp2.getErrorIndex() + sub1->getPos() + pp.getIndex(); + // commented out because ParsePosition doesn't have error index in 1.1.x + // restored for ICU4C port + int32_t temp = pp2.getErrorIndex() + sub1Pos + pp.getIndex(); if (temp> parsePosition.getErrorIndex()) { parsePosition.setErrorIndex(temp); } } } - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port else { - int32_t temp = sub1->getPos() + pp.getErrorIndex(); + // commented out because ParsePosition doesn't have error index in 1.1.x + // restored for ICU4C port + int32_t temp = sub1Pos + pp.getErrorIndex(); if (temp > parsePosition.getErrorIndex()) { parsePosition.setErrorIndex(temp); } @@ -905,7 +1069,7 @@ NFRule::doParse(const UnicodeString& text, // keep trying to match things until the outer matchToDelimiter() // call fails to make a match (each time, it picks up where it // left off the previous time) - } while (sub1->getPos() != sub2->getPos() + } while (sub1Pos != sub2Pos && pp.getIndex() > 0 && pp.getIndex() < workText.length() && pp.getIndex() != start); @@ -927,9 +1091,7 @@ NFRule::doParse(const UnicodeString& text, // we have to account for it here. By definition, if the matching // rule in a fraction rule set has no substitutions, its numerator // is 1, and so the result is the reciprocal of its base value. - if (isFractionRule && - highWaterMark > 0 && - sub1->isNullSubstitution()) { + if (isFractionRule && highWaterMark > 0 && sub1 == NULL) { result = 1 / result; } @@ -1012,6 +1174,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, + uint32_t nonNumericalExecutedRuleMask, double upperBound) const { UErrorCode status = U_ZERO_ERROR; @@ -1045,6 +1208,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); // if the substitution could match all the text up to @@ -1057,9 +1221,9 @@ NFRule::matchToDelimiter(const UnicodeString& text, pp.setIndex(dPos + dLen); return result.getDouble(); } - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port else { + // commented out because ParsePosition doesn't have error index in 1.1.x + // restored for ICU4C port if (tempPP.getErrorIndex() > 0) { pp.setErrorIndex(tempPP.getErrorIndex()); } else { @@ -1083,7 +1247,11 @@ NFRule::matchToDelimiter(const UnicodeString& text, // (i.e., is semantically empty), thwe we obviously can't search // for "delimiter". Instead, just use "sub" to parse as much of // "text" as possible. - } else { + } + else if (sub == NULL) { + return _baseValue; + } + else { ParsePosition tempPP; Formattable result; @@ -1094,8 +1262,9 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); - if (success && (tempPP.getIndex() != 0 || sub->isNullSubstitution())) { + if (success && (tempPP.getIndex() != 0)) { // if there's a successful match (or it's a null // substitution), update pp to point to the first // character we didn't match, and pass the result from @@ -1103,9 +1272,9 @@ NFRule::matchToDelimiter(const UnicodeString& text, pp.setIndex(tempPP.getIndex()); return result.getDouble(); } - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port else { + // commented out because ParsePosition doesn't have error index in 1.1.x + // restored for ICU4C port pp.setErrorIndex(tempPP.getErrorIndex()); } @@ -1148,16 +1317,17 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr // isn't a RuleBasedCollator, because RuleBasedCollator defines // the CollationElementIterator protocol. Hopefully, this // will change someday.) - RuleBasedCollator* collator = (RuleBasedCollator*)formatter->getCollator(); - CollationElementIterator* strIter = collator->createCollationElementIterator(str); - CollationElementIterator* prefixIter = collator->createCollationElementIterator(prefix); + const RuleBasedCollator* collator = formatter->getCollator(); + if (collator == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + LocalPointer strIter(collator->createCollationElementIterator(str)); + LocalPointer prefixIter(collator->createCollationElementIterator(prefix)); // Check for memory allocation error. - if (collator == NULL || strIter == NULL || prefixIter == NULL) { - delete collator; - delete strIter; - delete prefixIter; - status = U_MEMORY_ALLOCATION_ERROR; - return 0; + if (strIter.isNull() || prefixIter.isNull()) { + status = U_MEMORY_ALLOCATION_ERROR; + return 0; } UErrorCode err = U_ZERO_ERROR; @@ -1209,8 +1379,6 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr // if skipping over ignorables brought us to the end // of the target string, we didn't match and return 0 if (oStr == CollationElementIterator::NULLORDER) { - delete prefixIter; - delete strIter; return 0; } @@ -1219,8 +1387,6 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr // get a mismatch, dump out and return 0 if (CollationElementIterator::primaryOrder(oStr) != CollationElementIterator::primaryOrder(oPrefix)) { - delete prefixIter; - delete strIter; return 0; // otherwise, advance to the next character in each string @@ -1240,9 +1406,6 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr #ifdef RBNF_DEBUG fprintf(stderr, "prefix length: %d\n", result); #endif - delete prefixIter; - delete strIter; - return result; #if 0 //---------------------------------------------------------------- @@ -1326,107 +1489,84 @@ NFRule::findText(const UnicodeString& str, int32_t startingAt, int32_t* length) const { -#if !UCONFIG_NO_COLLATION - // if lenient parsing is turned off, this is easy: just call - // String.indexOf() and we're done + if (rulePatternFormat) { + Formattable result; + FieldPosition position(UNUM_INTEGER_FIELD); + position.setBeginIndex(startingAt); + rulePatternFormat->parseType(str, this, result, position); + int start = position.getBeginIndex(); + if (start >= 0) { + int32_t pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int32_t pluralRuleSuffix = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) + 2; + int32_t matchLen = position.getEndIndex() - start; + UnicodeString prefix(ruleText.tempSubString(0, pluralRuleStart)); + UnicodeString suffix(ruleText.tempSubString(pluralRuleSuffix)); + if (str.compare(start - prefix.length(), prefix.length(), prefix, 0, prefix.length()) == 0 + && str.compare(start + matchLen, suffix.length(), suffix, 0, suffix.length()) == 0) + { + *length = matchLen + prefix.length() + suffix.length(); + return start - prefix.length(); + } + } + *length = 0; + return -1; + } if (!formatter->isLenient()) { + // if lenient parsing is turned off, this is easy: just call + // String.indexOf() and we're done *length = key.length(); return str.indexOf(key, startingAt); - + } + else { // but if lenient parsing is turned ON, we've got some work // ahead of us - } else -#endif - { - //---------------------------------------------------------------- - // JDK 1.1 HACK (take out of 1.2-specific code) - - // in JDK 1.2, CollationElementIterator provides us with an - // API to map between character offsets and collation elements - // and we can do this by marching through the string comparing - // collation elements. We can't do that in JDK 1.1. Insted, - // we have to go through this horrible slow mess: - int32_t p = startingAt; - int32_t keyLen = 0; - - // basically just isolate smaller and smaller substrings of - // the target string (each running to the end of the string, - // and with the first one running from startingAt to the end) - // and then use prefixLength() to see if the search key is at - // the beginning of each substring. This is excruciatingly - // slow, but it will locate the key and tell use how long the - // matching text was. - UnicodeString temp; - UErrorCode status = U_ZERO_ERROR; - while (p < str.length() && keyLen == 0) { - temp.setTo(str, p, str.length() - p); - keyLen = prefixLength(temp, key, status); - if (U_FAILURE(status)) { - break; - } - if (keyLen != 0) { - *length = keyLen; - return p; - } - ++p; - } - // if we make it to here, we didn't find it. Return -1 for the - // location. The length should be ignored, but set it to 0, - // which should be "safe" - *length = 0; - return -1; + return findTextLenient(str, key, startingAt, length); + } +} - //---------------------------------------------------------------- - // JDK 1.2 version of this routine - //RuleBasedCollator collator = (RuleBasedCollator)formatter.getCollator(); - // - //CollationElementIterator strIter = collator.getCollationElementIterator(str); - //CollationElementIterator keyIter = collator.getCollationElementIterator(key); - // - //int keyStart = -1; - // - //str.setOffset(startingAt); - // - //int oStr = strIter.next(); - //int oKey = keyIter.next(); - //while (oKey != CollationElementIterator.NULLORDER) { - // while (oStr != CollationElementIterator.NULLORDER && - // CollationElementIterator.primaryOrder(oStr) == 0) - // oStr = strIter.next(); - // - // while (oKey != CollationElementIterator.NULLORDER && - // CollationElementIterator.primaryOrder(oKey) == 0) - // oKey = keyIter.next(); - // - // if (oStr == CollationElementIterator.NULLORDER) { - // return new int[] { -1, 0 }; - // } - // - // if (oKey == CollationElementIterator.NULLORDER) { - // break; - // } - // - // if (CollationElementIterator.primaryOrder(oStr) == - // CollationElementIterator.primaryOrder(oKey)) { - // keyStart = strIter.getOffset(); - // oStr = strIter.next(); - // oKey = keyIter.next(); - // } else { - // if (keyStart != -1) { - // keyStart = -1; - // keyIter.reset(); - // } else { - // oStr = strIter.next(); - // } - // } - //} - // - //if (oKey == CollationElementIterator.NULLORDER) { - // return new int[] { keyStart, strIter.getOffset() - keyStart }; - //} else { - // return new int[] { -1, 0 }; - //} +int32_t +NFRule::findTextLenient(const UnicodeString& str, + const UnicodeString& key, + int32_t startingAt, + int32_t* length) const +{ + //---------------------------------------------------------------- + // JDK 1.1 HACK (take out of 1.2-specific code) + + // in JDK 1.2, CollationElementIterator provides us with an + // API to map between character offsets and collation elements + // and we can do this by marching through the string comparing + // collation elements. We can't do that in JDK 1.1. Insted, + // we have to go through this horrible slow mess: + int32_t p = startingAt; + int32_t keyLen = 0; + + // basically just isolate smaller and smaller substrings of + // the target string (each running to the end of the string, + // and with the first one running from startingAt to the end) + // and then use prefixLength() to see if the search key is at + // the beginning of each substring. This is excruciatingly + // slow, but it will locate the key and tell use how long the + // matching text was. + UnicodeString temp; + UErrorCode status = U_ZERO_ERROR; + while (p < str.length() && keyLen == 0) { + temp.setTo(str, p, str.length() - p); + keyLen = prefixLength(temp, key, status); + if (U_FAILURE(status)) { + break; + } + if (keyLen != 0) { + *length = keyLen; + return p; + } + ++p; } + // if we make it to here, we didn't find it. Return -1 for the + // location. The length should be ignored, but set it to 0, + // which should be "safe" + *length = 0; + return -1; } /** @@ -1450,15 +1590,17 @@ NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const // a collation element iterator and make sure each collation // element is 0 (ignorable) at the primary level if (formatter->isLenient()) { - RuleBasedCollator* collator = (RuleBasedCollator*)(formatter->getCollator()); - CollationElementIterator* iter = collator->createCollationElementIterator(str); - + const RuleBasedCollator* collator = formatter->getCollator(); + if (collator == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + LocalPointer iter(collator->createCollationElementIterator(str)); + // Memory allocation error check. - if (collator == NULL || iter == NULL) { - delete collator; - delete iter; - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; + if (iter.isNull()) { + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; } UErrorCode err = U_ZERO_ERROR; @@ -1468,7 +1610,6 @@ NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const o = iter->next(err); } - delete iter; return o == CollationElementIterator::NULLORDER; } #endif @@ -1478,9 +1619,17 @@ NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const return FALSE; } +void +NFRule::setDecimalFormatSymbols(const DecimalFormatSymbols& newSymbols, UErrorCode& status) { + if (sub1 != NULL) { + sub1->setDecimalFormatSymbols(newSymbols, status); + } + if (sub2 != NULL) { + sub2->setDecimalFormatSymbols(newSymbols, status); + } +} + U_NAMESPACE_END /* U_HAVE_RBNF */ #endif - -