icuSources/i18n/number_formatimpl.cpp

   1 // © 2017 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3
   4 #include "unicode/utypes.h"
   5
   6 #if !UCONFIG_NO_FORMATTING
   7
   8 #include "cstring.h"
   9 #include "unicode/ures.h"
  10 #include "uresimp.h"
  11 #include "charstr.h"
  12 #include "number_formatimpl.h"
  13 #include "unicode/numfmt.h"
  14 #include "number_patternstring.h"
  15 #include "number_utils.h"
  16 #include "unicode/numberformatter.h"
  17 #include "unicode/dcfmtsym.h"
  18 #include "number_scientific.h"
  19 #include "number_compact.h"
  20 #include "uresimp.h"
  21 #include "ureslocs.h"
  22
  23 using namespace icu;
  24 using namespace icu::number;
  25 using namespace icu::number::impl;
  26
  27 namespace {
  28
  29 struct CurrencyFormatInfoResult {
  30     bool exists;
  31     const char16_t* pattern;
  32     const char16_t* decimalSeparator;
  33     const char16_t* groupingSeparator;
  34 };
  35
  36 CurrencyFormatInfoResult
  37 getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) {
  38     // TODO: Load this data in a centralized location like ICU4J?
  39     // TODO: Move this into the CurrencySymbols class?
  40     // TODO: Parts of this same data are loaded in dcfmtsym.cpp; should clean up.
  41     CurrencyFormatInfoResult result = {false, nullptr, nullptr, nullptr};
  42     if (U_FAILURE(status)) { return result; }
  43     CharString key;
  44     key.append("Currencies/", status);
  45     key.append(isoCode, status);
  46     UErrorCode localStatus = status;
  47     LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_CURR, locale.getName(), &localStatus));
  48     ures_getByKeyWithFallback(bundle.getAlias(), key.data(), bundle.getAlias(), &localStatus);
  49     if (U_SUCCESS(localStatus) &&
  50         ures_getSize(bundle.getAlias()) > 2) { // the length is 3 if more data is present
  51         ures_getByIndex(bundle.getAlias(), 2, bundle.getAlias(), &localStatus);
  52         int32_t dummy;
  53         result.exists = true;
  54         result.pattern = ures_getStringByIndex(bundle.getAlias(), 0, &dummy, &localStatus);
  55         result.decimalSeparator = ures_getStringByIndex(bundle.getAlias(), 1, &dummy, &localStatus);
  56         result.groupingSeparator = ures_getStringByIndex(bundle.getAlias(), 2, &dummy, &localStatus);
  57         status = localStatus;
  58     } else if (localStatus != U_MISSING_RESOURCE_ERROR) {
  59         status = localStatus;
  60     }
  61     return result;
  62 }
  63
  64 }  // namespace
  65
  66
  67 MicroPropsGenerator::~MicroPropsGenerator() = default;
  68
  69
  70 NumberFormatterImpl* NumberFormatterImpl::fromMacros(const MacroProps& macros, UErrorCode& status) {
  71     return new NumberFormatterImpl(macros, true, status);
  72 }
  73
  74 void NumberFormatterImpl::applyStatic(const MacroProps& macros, DecimalQuantity& inValue,
  75                                       NumberStringBuilder& outString, UErrorCode& status) {
  76     NumberFormatterImpl impl(macros, false, status);
  77     impl.applyUnsafe(inValue, outString, status);
  78 }
  79
  80 int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int8_t signum,
  81                                                    StandardPlural::Form plural,
  82                                                    NumberStringBuilder& outString, UErrorCode& status) {
  83     NumberFormatterImpl impl(macros, false, status);
  84     return impl.getPrefixSuffixUnsafe(signum, plural, outString, status);
  85 }
  86
  87 // NOTE: C++ SPECIFIC DIFFERENCE FROM JAVA:
  88 // The "safe" apply method uses a new MicroProps. In the MicroPropsGenerator, fMicros is copied into the new instance.
  89 // The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
  90 // See MicroProps::processQuantity() for details.
  91
  92 void NumberFormatterImpl::apply(DecimalQuantity& inValue, NumberStringBuilder& outString,
  93                                 UErrorCode& status) const {
  94     if (U_FAILURE(status)) { return; }
  95     MicroProps micros;
  96     if (!fMicroPropsGenerator) { return; }
  97     fMicroPropsGenerator->processQuantity(inValue, micros, status);
  98     if (U_FAILURE(status)) { return; }
  99     microsToString(micros, inValue, outString, status);
 100 }
 101
 102 void NumberFormatterImpl::applyUnsafe(DecimalQuantity& inValue, NumberStringBuilder& outString,
 103                                       UErrorCode& status) {
 104     if (U_FAILURE(status)) { return; }
 105     fMicroPropsGenerator->processQuantity(inValue, fMicros, status);
 106     if (U_FAILURE(status)) { return; }
 107     microsToString(fMicros, inValue, outString, status);
 108 }
 109
 110 int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form plural,
 111                                              NumberStringBuilder& outString, UErrorCode& status) const {
 112     if (U_FAILURE(status)) { return 0; }
 113     // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
 114     // Safe path: use fImmutablePatternModifier.
 115     const Modifier* modifier = fImmutablePatternModifier->getModifier(signum, plural);
 116     modifier->apply(outString, 0, 0, status);
 117     if (U_FAILURE(status)) { return 0; }
 118     return modifier->getPrefixLength(status);
 119 }
 120
 121 int32_t NumberFormatterImpl::getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural,
 122                                                    NumberStringBuilder& outString, UErrorCode& status) {
 123     if (U_FAILURE(status)) { return 0; }
 124     // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
 125     // Unsafe path: use fPatternModifier.
 126     fPatternModifier->setNumberProperties(signum, plural);
 127     fPatternModifier->apply(outString, 0, 0, status);
 128     if (U_FAILURE(status)) { return 0; }
 129     return fPatternModifier->getPrefixLength(status);
 130 }
 131
 132 NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, bool safe, UErrorCode& status) {
 133     fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status);
 134 }
 135
 136 //////////
 137
 138 const MicroPropsGenerator*
 139 NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, UErrorCode& status) {
 140     if (U_FAILURE(status)) { return nullptr; }
 141     const MicroPropsGenerator* chain = &fMicros;
 142
 143     // Check that macros is error-free before continuing.
 144     if (macros.copyErrorTo(status)) {
 145         return nullptr;
 146     }
 147
 148     // TODO: Accept currency symbols from DecimalFormatSymbols?
 149
 150     // Pre-compute a few values for efficiency.
 151     bool isCurrency = utils::unitIsCurrency(macros.unit);
 152     bool isNoUnit = utils::unitIsNoUnit(macros.unit);
 153     bool isPercent = isNoUnit && utils::unitIsPercent(macros.unit);
 154     bool isPermille = isNoUnit && utils::unitIsPermille(macros.unit);
 155     bool isCldrUnit = !isCurrency && !isNoUnit;
 156     bool isAccounting =
 157             macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS ||
 158             macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
 159     CurrencyUnit currency(nullptr, status);
 160     if (isCurrency) {
 161         currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit
 162     }
 163     const CurrencySymbols* currencySymbols;
 164     if (macros.currencySymbols != nullptr) {
 165         // Used by the DecimalFormat code path
 166         currencySymbols = macros.currencySymbols;
 167     } else {
 168         fWarehouse.fCurrencySymbols = {currency, macros.locale, status};
 169         currencySymbols = &fWarehouse.fCurrencySymbols;
 170     }
 171     UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT;
 172     if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) {
 173         unitWidth = macros.unitWidth;
 174     }
 175
 176     // Select the numbering system.
 177     LocalPointer<const NumberingSystem> nsLocal;
 178     const NumberingSystem* ns;
 179     if (macros.symbols.isNumberingSystem()) {
 180         ns = macros.symbols.getNumberingSystem();
 181     } else {
 182         // TODO: Is there a way to avoid creating the NumberingSystem object?
 183         ns = NumberingSystem::createInstance(macros.locale, status);
 184         // Give ownership to the function scope.
 185         nsLocal.adoptInstead(ns);
 186     }
 187     const char* nsName = U_SUCCESS(status) ? ns->getName() : "latn";
 188
 189     // Resolve the symbols. Do this here because currency may need to customize them.
 190     if (macros.symbols.isDecimalFormatSymbols()) {
 191         fMicros.symbols = macros.symbols.getDecimalFormatSymbols();
 192     } else {
 193         fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status);
 194         // Give ownership to the NumberFormatterImpl.
 195         fSymbols.adoptInstead(fMicros.symbols);
 196     }
 197
 198     // Load and parse the pattern string. It is used for grouping sizes and affixes only.
 199     // If we are formatting currency, check for a currency-specific pattern.
 200     const char16_t* pattern = nullptr;
 201     if (isCurrency) {
 202         CurrencyFormatInfoResult info = getCurrencyFormatInfo(
 203                 macros.locale, currency.getSubtype(), status);
 204         if (info.exists) {
 205             pattern = info.pattern;
 206             // It's clunky to clone an object here, but this code is not frequently executed.
 207             auto* symbols = new DecimalFormatSymbols(*fMicros.symbols);
 208             fMicros.symbols = symbols;
 209             fSymbols.adoptInstead(symbols);
 210             symbols->setSymbol(
 211                     DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol,
 212                     UnicodeString(info.decimalSeparator),
 213                     FALSE);
 214             symbols->setSymbol(
 215                     DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol,
 216                     UnicodeString(info.groupingSeparator),
 217                     FALSE);
 218         }
 219     }
 220     if (pattern == nullptr) {
 221         CldrPatternStyle patternStyle;
 222         if (isPercent || isPermille) {
 223             patternStyle = CLDR_PATTERN_STYLE_PERCENT;
 224         } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
 225             patternStyle = CLDR_PATTERN_STYLE_DECIMAL;
 226         } else if (isAccounting) {
 227             // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now,
 228             // the API contract allows us to add support to other units in the future.
 229             patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING;
 230         } else {
 231             patternStyle = CLDR_PATTERN_STYLE_CURRENCY;
 232         }
 233         pattern = utils::getPatternForStyle(macros.locale, nsName, patternStyle, status);
 234     }
 235     auto patternInfo = new ParsedPatternInfo();
 236     fPatternInfo.adoptInstead(patternInfo);
 237     PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status);
 238
 239     /////////////////////////////////////////////////////////////////////////////////////
 240     /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR ///
 241     /////////////////////////////////////////////////////////////////////////////////////
 242
 243     // Multiplier
 244     if (macros.scale.isValid()) {
 245         fMicros.helpers.multiplier.setAndChain(macros.scale, chain);
 246         chain = &fMicros.helpers.multiplier;
 247     }
 248
 249     // Rounding strategy
 250     Precision precision;
 251     if (!macros.precision.isBogus()) {
 252         precision = macros.precision;
 253     } else if (macros.notation.fType == Notation::NTN_COMPACT) {
 254         precision = Precision::integer().withMinDigits(2);
 255     } else if (isCurrency) {
 256         precision = Precision::currency(UCURR_USAGE_STANDARD);
 257     } else {
 258         precision = Precision::maxFraction(6);
 259     }
 260     UNumberFormatRoundingMode roundingMode;
 261     if (macros.roundingMode != kDefaultMode) {
 262         roundingMode = macros.roundingMode;
 263     } else {
 264         // Temporary until ICU 64
 265         roundingMode = precision.fRoundingMode;
 266     }
 267     fMicros.rounder = {precision, roundingMode, currency, status};
 268
 269     // Grouping strategy
 270     if (!macros.grouper.isBogus()) {
 271         fMicros.grouping = macros.grouper;
 272     } else if (macros.notation.fType == Notation::NTN_COMPACT) {
 273         // Compact notation uses minGrouping by default since ICU 59
 274         fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_MIN2);
 275     } else {
 276         fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_AUTO);
 277     }
 278     fMicros.grouping.setLocaleData(*fPatternInfo, macros.locale);
 279
 280     // Padding strategy
 281     if (!macros.padder.isBogus()) {
 282         fMicros.padding = macros.padder;
 283     } else {
 284         fMicros.padding = Padder::none();
 285     }
 286
 287     // Integer width
 288     if (!macros.integerWidth.isBogus()) {
 289         fMicros.integerWidth = macros.integerWidth;
 290     } else {
 291         fMicros.integerWidth = IntegerWidth::standard();
 292     }
 293
 294     // Sign display
 295     if (macros.sign != UNUM_SIGN_COUNT) {
 296         fMicros.sign = macros.sign;
 297     } else {
 298         fMicros.sign = UNUM_SIGN_AUTO;
 299     }
 300
 301     // Decimal mark display
 302     if (macros.decimal != UNUM_DECIMAL_SEPARATOR_COUNT) {
 303         fMicros.decimal = macros.decimal;
 304     } else {
 305         fMicros.decimal = UNUM_DECIMAL_SEPARATOR_AUTO;
 306     }
 307
 308     // Use monetary separator symbols
 309     fMicros.useCurrency = isCurrency;
 310
 311     // Inner modifier (scientific notation)
 312     if (macros.notation.fType == Notation::NTN_SCIENTIFIC) {
 313         fScientificHandler.adoptInstead(new ScientificHandler(&macros.notation, fMicros.symbols, chain));
 314         chain = fScientificHandler.getAlias();
 315     } else {
 316         // No inner modifier required
 317         fMicros.modInner = &fMicros.helpers.emptyStrongModifier;
 318     }
 319
 320     // Middle modifier (patterns, positive/negative, currency symbols, percent)
 321     auto patternModifier = new MutablePatternModifier(false);
 322     fPatternModifier.adoptInstead(patternModifier);
 323     patternModifier->setPatternInfo(
 324             macros.affixProvider != nullptr ? macros.affixProvider
 325                                             : static_cast<const AffixPatternProvider*>(fPatternInfo.getAlias()));
 326     patternModifier->setPatternAttributes(fMicros.sign, isPermille);
 327     if (patternModifier->needsPlurals()) {
 328         patternModifier->setSymbols(
 329                 fMicros.symbols,
 330                 currencySymbols,
 331                 unitWidth,
 332                 resolvePluralRules(macros.rules, macros.locale, status));
 333     } else {
 334         patternModifier->setSymbols(fMicros.symbols, currencySymbols, unitWidth, nullptr);
 335     }
 336     if (safe) {
 337         fImmutablePatternModifier.adoptInstead(patternModifier->createImmutableAndChain(chain, status));
 338         chain = fImmutablePatternModifier.getAlias();
 339     } else {
 340         patternModifier->addToChain(chain);
 341         chain = patternModifier;
 342     }
 343
 344     // Outer modifier (CLDR units and currency long names)
 345     if (isCldrUnit) {
 346         fLongNameHandler.adoptInstead(
 347                 new LongNameHandler(
 348                         LongNameHandler::forMeasureUnit(
 349                                 macros.locale,
 350                                 macros.unit,
 351                                 macros.perUnit,
 352                                 unitWidth,
 353                                 resolvePluralRules(macros.rules, macros.locale, status),
 354                                 chain,
 355                                 status)));
 356         chain = fLongNameHandler.getAlias();
 357     } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
 358         fLongNameHandler.adoptInstead(
 359                 new LongNameHandler(
 360                         LongNameHandler::forCurrencyLongNames(
 361                                 macros.locale,
 362                                 currency,
 363                                 resolvePluralRules(macros.rules, macros.locale, status),
 364                                 chain,
 365                                 status)));
 366         chain = fLongNameHandler.getAlias();
 367     } else {
 368         // No outer modifier required
 369         fMicros.modOuter = &fMicros.helpers.emptyWeakModifier;
 370     }
 371
 372     // Compact notation
 373     // NOTE: Compact notation can (but might not) override the middle modifier and rounding.
 374     // It therefore needs to go at the end of the chain.
 375     if (macros.notation.fType == Notation::NTN_COMPACT) {
 376         CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME)
 377                                   ? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL;
 378         fCompactHandler.adoptInstead(
 379                 new CompactHandler(
 380                         macros.notation.fUnion.compactStyle,
 381                         macros.locale,
 382                         nsName,
 383                         compactType,
 384                         resolvePluralRules(macros.rules, macros.locale, status),
 385                         safe ? patternModifier : nullptr,
 386                         chain,
 387                         status));
 388         chain = fCompactHandler.getAlias();
 389     }
 390
 391     return chain;
 392 }
 393
 394 const PluralRules*
 395 NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Locale& locale,
 396                                         UErrorCode& status) {
 397     if (rulesPtr != nullptr) {
 398         return rulesPtr;
 399     }
 400     // Lazily create PluralRules
 401     if (fRules.isNull()) {
 402         fRules.adoptInstead(PluralRules::forLocale(locale, status));
 403     }
 404     return fRules.getAlias();
 405 }
 406
 407 int32_t NumberFormatterImpl::microsToString(const MicroProps& micros, DecimalQuantity& quantity,
 408                                             NumberStringBuilder& string, UErrorCode& status) {
 409     micros.rounder.apply(quantity, status);
 410     micros.integerWidth.apply(quantity, status);
 411     int32_t length = writeNumber(micros, quantity, string, status);
 412     // NOTE: When range formatting is added, these modifiers can bubble up.
 413     // For now, apply them all here at once.
 414     // Always apply the inner modifier (which is "strong").
 415     length += micros.modInner->apply(string, 0, length, status);
 416     if (micros.padding.isValid()) {
 417         length += micros.padding
 418                 .padAndApply(*micros.modMiddle, *micros.modOuter, string, 0, length, status);
 419     } else {
 420         length += micros.modMiddle->apply(string, 0, length, status);
 421         length += micros.modOuter->apply(string, 0, length, status);
 422     }
 423     return length;
 424 }
 425
 426 int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
 427                                          NumberStringBuilder& string, UErrorCode& status) {
 428     int32_t length = 0;
 429     if (quantity.isInfinite()) {
 430         length += string.insert(
 431                 length,
 432                 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol),
 433                 UNUM_INTEGER_FIELD,
 434                 status);
 435
 436     } else if (quantity.isNaN()) {
 437         length += string.insert(
 438                 length,
 439                 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol),
 440                 UNUM_INTEGER_FIELD,
 441                 status);
 442
 443     } else {
 444         // Add the integer digits
 445         length += writeIntegerDigits(micros, quantity, string, status);
 446
 447         // Add the decimal point
 448         if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) {
 449             length += string.insert(
 450                     length,
 451                     micros.useCurrency ? micros.symbols->getSymbol(
 452                             DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros
 453                             .symbols
 454                             ->getSymbol(
 455                                     DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol),
 456                     UNUM_DECIMAL_SEPARATOR_FIELD,
 457                     status);
 458         }
 459
 460         // Add the fraction digits
 461         length += writeFractionDigits(micros, quantity, string, status);
 462     }
 463
 464     return length;
 465 }
 466
 467 int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity,
 468                                                 NumberStringBuilder& string, UErrorCode& status) {
 469     int length = 0;
 470     int integerCount = quantity.getUpperDisplayMagnitude() + 1;
 471     for (int i = 0; i < integerCount; i++) {
 472         // Add grouping separator
 473         if (micros.grouping.groupAtPosition(i, quantity)) {
 474             length += string.insert(
 475                     0,
 476                     micros.useCurrency ? micros.symbols->getSymbol(
 477                             DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol)
 478                                        : micros.symbols->getSymbol(
 479                             DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol),
 480                     UNUM_GROUPING_SEPARATOR_FIELD,
 481                     status);
 482         }
 483
 484         // Get and append the next digit value
 485         int8_t nextDigit = quantity.getDigit(i);
 486         length += utils::insertDigitFromSymbols(
 487                 string, 0, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status);
 488     }
 489     return length;
 490 }
 491
 492 int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity,
 493                                                  NumberStringBuilder& string, UErrorCode& status) {
 494     int length = 0;
 495     int fractionCount = -quantity.getLowerDisplayMagnitude();
 496     for (int i = 0; i < fractionCount; i++) {
 497         // Get and append the next digit value
 498         int8_t nextDigit = quantity.getDigit(-i - 1);
 499         length += utils::insertDigitFromSymbols(
 500                 string, string.length(), nextDigit, *micros.symbols, UNUM_FRACTION_FIELD, status);
 501     }
 502     return length;
 503 }
 504
 505 #endif /* #if !UCONFIG_NO_FORMATTING */