icuSources/i18n/compactdecimalformat.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 *******************************************************************************
   5 * Copyright (C) 1997-2015, International Business Machines Corporation and    *
   6 * others. All Rights Reserved.                                                *
   7 *******************************************************************************
   8 *
   9 * File COMPACTDECIMALFORMAT.CPP
  10 *
  11 ********************************************************************************
  12 */
  13 #include "unicode/utypes.h"
  14
  15 #if !UCONFIG_NO_FORMATTING
  16
  17 #include "charstr.h"
  18 #include "cstring.h"
  19 #include "digitlst.h"
  20 #include "mutex.h"
  21 #include "unicode/compactdecimalformat.h"
  22 #include "unicode/numsys.h"
  23 #include "unicode/plurrule.h"
  24 #include "unicode/ures.h"
  25 #include "ucln_in.h"
  26 #include "uhash.h"
  27 #include "umutex.h"
  28 #include "unicode/ures.h"
  29 #include "uresimp.h"
  30
  31 // Maps locale name to CDFLocaleData struct.
  32 static UHashtable* gCompactDecimalData = NULL;
  33 static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
  34
  35 U_NAMESPACE_BEGIN
  36
  37 static const int32_t MAX_DIGITS = 15;
  38 static const char gOther[] = "other";
  39 static const char gLatnTag[] = "latn";
  40 static const char gNumberElementsTag[] = "NumberElements";
  41 static const char gDecimalFormatTag[] = "decimalFormat";
  42 static const char gPatternsShort[] = "patternsShort";
  43 static const char gPatternsLong[] = "patternsLong";
  44 static const char gLatnPath[] = "NumberElements/latn";
  45
  46 static const UChar u_0 = 0x30;
  47 static const UChar u_apos = 0x27;
  48
  49 static const UChar kZero[] = {u_0};
  50
  51 // Used to unescape single quotes.
  52 enum QuoteState {
  53   OUTSIDE,
  54   INSIDE_EMPTY,
  55   INSIDE_FULL
  56 };
  57
  58 enum FallbackFlags {
  59   ANY = 0,
  60   MUST = 1,
  61   NOT_ROOT = 2
  62   // Next one will be 4 then 6 etc.
  63 };
  64
  65
  66 // CDFUnit represents a prefix-suffix pair for a particular variant
  67 // and log10 value.
  68 struct CDFUnit : public UMemory {
  69   UnicodeString prefix;
  70   UnicodeString suffix;
  71   inline CDFUnit() : prefix(), suffix() {
  72     prefix.setToBogus();
  73   }
  74   inline ~CDFUnit() {}
  75   inline UBool isSet() const {
  76     return !prefix.isBogus();
  77   }
  78   inline void markAsSet() {
  79     prefix.remove();
  80   }
  81 };
  82
  83 // CDFLocaleStyleData contains formatting data for a particular locale
  84 // and style.
  85 class CDFLocaleStyleData : public UMemory {
  86  public:
  87   // What to divide by for each log10 value when formatting. These values
  88   // will be powers of 10. For English, would be:
  89   // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
  90   double divisors[MAX_DIGITS];
  91   // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
  92   // To format a number x,
  93   // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
  94   // Compute the plural variant for displayNum
  95   // (e.g zero, one, two, few, many, other).
  96   // Compute cdfUnits = unitsByVariant[pluralVariant].
  97   // Prefix and suffix to use at cdfUnits[log10(x)]
  98   UHashtable* unitsByVariant;
  99   // A flag for whether or not this CDFLocaleStyleData was loaded from the
 100   // Latin numbering system as a fallback from the locale numbering system.
 101   // This value is meaningless if the object is bogus or empty.
 102   UBool fromFallback;
 103   inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) {
 104     uprv_memset(divisors, 0, sizeof(divisors));
 105   }
 106   ~CDFLocaleStyleData();
 107   // Init initializes this object.
 108   void Init(UErrorCode& status);
 109   inline UBool isBogus() const {
 110     return unitsByVariant == NULL;
 111   }
 112   void setToBogus();
 113   UBool isEmpty() {
 114     return unitsByVariant == NULL || unitsByVariant->count == 0;
 115   }
 116  private:
 117   CDFLocaleStyleData(const CDFLocaleStyleData&);
 118   CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
 119 };
 120
 121 // CDFLocaleData contains formatting data for a particular locale.
 122 struct CDFLocaleData : public UMemory {
 123   CDFLocaleStyleData shortData;
 124   CDFLocaleStyleData longData;
 125   inline CDFLocaleData() : shortData(), longData() { }
 126   inline ~CDFLocaleData() { }
 127   // Init initializes this object.
 128   void Init(UErrorCode& status);
 129 };
 130
 131 U_NAMESPACE_END
 132
 133 U_CDECL_BEGIN
 134
 135 static UBool U_CALLCONV cdf_cleanup(void) {
 136   if (gCompactDecimalData != NULL) {
 137     uhash_close(gCompactDecimalData);
 138     gCompactDecimalData = NULL;
 139   }
 140   return TRUE;
 141 }
 142
 143 static void U_CALLCONV deleteCDFUnits(void* ptr) {
 144   delete [] (icu::CDFUnit*) ptr;
 145 }
 146
 147 static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
 148   delete (icu::CDFLocaleData*) ptr;
 149 }
 150
 151 U_CDECL_END
 152
 153 U_NAMESPACE_BEGIN
 154
 155 static UBool divisors_equal(const double* lhs, const double* rhs);
 156 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
 157
 158 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
 159 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
 160 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
 161 static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status);
 162 static double calculateDivisor(double power10, int32_t numZeros);
 163 static UBool onlySpaces(UnicodeString u);
 164 static void fixQuotes(UnicodeString& s);
 165 static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status);
 166 static void fillInMissing(CDFLocaleStyleData* result);
 167 static int32_t computeLog10(double x, UBool inRange);
 168 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
 169 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
 170
 171 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
 172
 173 CompactDecimalFormat::CompactDecimalFormat(
 174     const DecimalFormat& decimalFormat,
 175     const UHashtable* unitsByVariant,
 176     const double* divisors,
 177     PluralRules* pluralRules)
 178   : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
 179 }
 180
 181 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
 182     : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
 183 }
 184
 185 CompactDecimalFormat* U_EXPORT2
 186 CompactDecimalFormat::createInstance(
 187     const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
 188   LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
 189   if (U_FAILURE(status)) {
 190     return NULL;
 191   }
 192   LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
 193   if (U_FAILURE(status)) {
 194     return NULL;
 195   }
 196   const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
 197   if (U_FAILURE(status)) {
 198     return NULL;
 199   }
 200   CompactDecimalFormat* result =
 201       new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
 202   if (result == NULL) {
 203     status = U_MEMORY_ALLOCATION_ERROR;
 204     return NULL;
 205   }
 206   pluralRules.orphan();
 207   result->setMaximumSignificantDigits(3);
 208   result->setSignificantDigitsUsed(TRUE);
 209   result->setGroupingUsed(FALSE);
 210   return result;
 211 }
 212
 213 CompactDecimalFormat&
 214 CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
 215   if (this != &rhs) {
 216     DecimalFormat::operator=(rhs);
 217     _unitsByVariant = rhs._unitsByVariant;
 218     _divisors = rhs._divisors;
 219     delete _pluralRules;
 220     _pluralRules = rhs._pluralRules->clone();
 221   }
 222   return *this;
 223 }
 224
 225 CompactDecimalFormat::~CompactDecimalFormat() {
 226   delete _pluralRules;
 227 }
 228
 229
 230 Format*
 231 CompactDecimalFormat::clone(void) const {
 232   return new CompactDecimalFormat(*this);
 233 }
 234
 235 UBool
 236 CompactDecimalFormat::operator==(const Format& that) const {
 237   if (this == &that) {
 238     return TRUE;
 239   }
 240   return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
 241 }
 242
 243 UBool
 244 CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
 245   return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
 246 }
 247
 248 UnicodeString&
 249 CompactDecimalFormat::format(
 250     double number,
 251     UnicodeString& appendTo,
 252     FieldPosition& pos) const {
 253   UErrorCode status = U_ZERO_ERROR;
 254   return format(number, appendTo, pos, status);
 255 }
 256
 257 UnicodeString&
 258 CompactDecimalFormat::format(
 259     double number,
 260     UnicodeString& appendTo,
 261     FieldPosition& pos,
 262     UErrorCode &status) const {
 263   if (U_FAILURE(status)) {
 264     return appendTo;
 265   }
 266   DigitList orig, rounded;
 267   orig.set(number);
 268   UBool isNegative;
 269   _round(orig, rounded, isNegative, status);
 270   if (U_FAILURE(status)) {
 271     return appendTo;
 272   }
 273   double roundedDouble = rounded.getDouble();
 274   if (isNegative) {
 275     roundedDouble = -roundedDouble;
 276   }
 277   int32_t baseIdx = computeLog10(roundedDouble, TRUE);
 278   double numberToFormat = roundedDouble / _divisors[baseIdx];
 279   UnicodeString variant = _pluralRules->select(numberToFormat);
 280   if (isNegative) {
 281     numberToFormat = -numberToFormat;
 282   }
 283   const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
 284   appendTo += unit->prefix;
 285   DecimalFormat::format(numberToFormat, appendTo, pos);
 286   appendTo += unit->suffix;
 287   return appendTo;
 288 }
 289
 290 UnicodeString&
 291 CompactDecimalFormat::format(
 292     double /* number */,
 293     UnicodeString& appendTo,
 294     FieldPositionIterator* /* posIter */,
 295     UErrorCode& status) const {
 296   status = U_UNSUPPORTED_ERROR;
 297   return appendTo;
 298 }
 299
 300 UnicodeString&
 301 CompactDecimalFormat::format(
 302     int32_t number,
 303     UnicodeString& appendTo,
 304     FieldPosition& pos) const {
 305   return format((double) number, appendTo, pos);
 306 }
 307
 308 UnicodeString&
 309 CompactDecimalFormat::format(
 310     int32_t number,
 311     UnicodeString& appendTo,
 312     FieldPosition& pos,
 313     UErrorCode &status) const {
 314   return format((double) number, appendTo, pos, status);
 315 }
 316
 317 UnicodeString&
 318 CompactDecimalFormat::format(
 319     int32_t /* number */,
 320     UnicodeString& appendTo,
 321     FieldPositionIterator* /* posIter */,
 322     UErrorCode& status) const {
 323   status = U_UNSUPPORTED_ERROR;
 324   return appendTo;
 325 }
 326
 327 UnicodeString&
 328 CompactDecimalFormat::format(
 329     int64_t number,
 330     UnicodeString& appendTo,
 331     FieldPosition& pos) const {
 332   return format((double) number, appendTo, pos);
 333 }
 334
 335 UnicodeString&
 336 CompactDecimalFormat::format(
 337     int64_t number,
 338     UnicodeString& appendTo,
 339     FieldPosition& pos,
 340     UErrorCode &status) const {
 341   return format((double) number, appendTo, pos, status);
 342 }
 343
 344 UnicodeString&
 345 CompactDecimalFormat::format(
 346     int64_t /* number */,
 347     UnicodeString& appendTo,
 348     FieldPositionIterator* /* posIter */,
 349     UErrorCode& status) const {
 350   status = U_UNSUPPORTED_ERROR;
 351   return appendTo;
 352 }
 353
 354 UnicodeString&
 355 CompactDecimalFormat::format(
 356     StringPiece /* number */,
 357     UnicodeString& appendTo,
 358     FieldPositionIterator* /* posIter */,
 359     UErrorCode& status) const {
 360   status = U_UNSUPPORTED_ERROR;
 361   return appendTo;
 362 }
 363
 364 UnicodeString&
 365 CompactDecimalFormat::format(
 366     const DigitList& /* number */,
 367     UnicodeString& appendTo,
 368     FieldPositionIterator* /* posIter */,
 369     UErrorCode& status) const {
 370   status = U_UNSUPPORTED_ERROR;
 371   return appendTo;
 372 }
 373
 374 UnicodeString&
 375 CompactDecimalFormat::format(const DigitList& /* number */,
 376                              UnicodeString& appendTo,
 377                              FieldPosition& /* pos */,
 378                              UErrorCode& status) const {
 379   status = U_UNSUPPORTED_ERROR;
 380   return appendTo;
 381 }
 382
 383 void
 384 CompactDecimalFormat::parse(
 385     const UnicodeString& /* text */,
 386     Formattable& /* result */,
 387     ParsePosition& /* parsePosition */) const {
 388 }
 389
 390 void
 391 CompactDecimalFormat::parse(
 392     const UnicodeString& /* text */,
 393     Formattable& /* result */,
 394     UErrorCode& status) const {
 395   status = U_UNSUPPORTED_ERROR;
 396 }
 397
 398 CurrencyAmount*
 399 CompactDecimalFormat::parseCurrency(
 400     const UnicodeString& /* text */,
 401     ParsePosition& /* pos */) const {
 402   return NULL;
 403 }
 404
 405 void CDFLocaleStyleData::Init(UErrorCode& status) {
 406   if (unitsByVariant != NULL) {
 407     return;
 408   }
 409   unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
 410   if (U_FAILURE(status)) {
 411     return;
 412   }
 413   uhash_setKeyDeleter(unitsByVariant, uprv_free);
 414   uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
 415 }
 416
 417 CDFLocaleStyleData::~CDFLocaleStyleData() {
 418   setToBogus();
 419 }
 420
 421 void CDFLocaleStyleData::setToBogus() {
 422   if (unitsByVariant != NULL) {
 423     uhash_close(unitsByVariant);
 424     unitsByVariant = NULL;
 425   }
 426 }
 427
 428 void CDFLocaleData::Init(UErrorCode& status) {
 429   shortData.Init(status);
 430   if (U_FAILURE(status)) {
 431     return;
 432   }
 433   longData.Init(status);
 434 }
 435
 436 // Helper method for operator=
 437 static UBool divisors_equal(const double* lhs, const double* rhs) {
 438   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
 439     if (lhs[i] != rhs[i]) {
 440       return FALSE;
 441     }
 442   }
 443   return TRUE;
 444 }
 445
 446 // getCDFLocaleStyleData returns pointer to formatting data for given locale and
 447 // style within the global cache. On cache miss, getCDFLocaleStyleData loads
 448 // the data from CLDR into the global cache before returning the pointer. If a
 449 // UNUM_LONG data is requested for a locale, and that locale does not have
 450 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
 451 // that locale.
 452 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
 453   if (U_FAILURE(status)) {
 454     return NULL;
 455   }
 456   CDFLocaleData* result = NULL;
 457   const char* key = inLocale.getName();
 458   {
 459     Mutex lock(&gCompactDecimalMetaLock);
 460     if (gCompactDecimalData == NULL) {
 461       gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
 462       if (U_FAILURE(status)) {
 463         return NULL;
 464       }
 465       uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
 466       uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
 467       ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
 468     } else {
 469       result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
 470     }
 471   }
 472   if (result != NULL) {
 473     return extractDataByStyleEnum(*result, style, status);
 474   }
 475
 476   result = loadCDFLocaleData(inLocale, status);
 477   if (U_FAILURE(status)) {
 478     return NULL;
 479   }
 480
 481   {
 482     Mutex lock(&gCompactDecimalMetaLock);
 483     CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
 484     if (temp != NULL) {
 485       delete result;
 486       result = temp;
 487     } else {
 488       uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
 489       if (U_FAILURE(status)) {
 490         return NULL;
 491       }
 492     }
 493   }
 494   return extractDataByStyleEnum(*result, style, status);
 495 }
 496
 497 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
 498   switch (style) {
 499     case UNUM_SHORT:
 500       return &data.shortData;
 501     case UNUM_LONG:
 502       if (!data.longData.isBogus()) {
 503         return &data.longData;
 504       }
 505       return &data.shortData;
 506     default:
 507       status = U_ILLEGAL_ARGUMENT_ERROR;
 508       return NULL;
 509   }
 510 }
 511
 512 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
 513 // caller owns the returned pointer.
 514 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
 515   if (U_FAILURE(status)) {
 516     return NULL;
 517   }
 518   CDFLocaleData* result = new CDFLocaleData;
 519   if (result == NULL) {
 520     status = U_MEMORY_ALLOCATION_ERROR;
 521     return NULL;
 522   }
 523   result->Init(status);
 524   if (U_FAILURE(status)) {
 525     delete result;
 526     return NULL;
 527   }
 528
 529   load(inLocale, result, status);
 530
 531   if (U_FAILURE(status)) {
 532     delete result;
 533     return NULL;
 534   }
 535   return result;
 536 }
 537
 538 namespace {
 539
 540 struct CmptDecDataSink : public ResourceSink {
 541
 542   CDFLocaleData& dataBundle; // Where to save values when they are read
 543   UBool isLatin; // Whether or not we are traversing the Latin tree
 544   UBool isFallback; // Whether or not we are traversing the Latin tree as fallback
 545
 546   enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG };
 547   enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT };
 548
 549   /*
 550    * NumberElements{              <-- top (numbering system table)
 551    *  latn{                       <-- patternsTable (one per numbering system)
 552    *    patternsLong{             <-- formatsTable (one per pattern)
 553    *      decimalFormat{          <-- powersOfTenTable (one per format)
 554    *        1000{                 <-- pluralVariantsTable (one per power of ten)
 555    *          one{"0 thousand"}   <-- plural variant and template
 556    */
 557
 558   CmptDecDataSink(CDFLocaleData& _dataBundle)
 559     : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {}
 560   virtual ~CmptDecDataSink();
 561
 562   virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) {
 563     // SPECIAL CASE: Don't consume root in the non-Latin numbering system
 564     if (isRoot && !isLatin) { return; }
 565
 566     ResourceTable patternsTable = value.getTable(errorCode);
 567     if (U_FAILURE(errorCode)) { return; }
 568     for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) {
 569
 570       // Check for patternsShort or patternsLong
 571       EPatternsTableKey patternsTableKey;
 572       if (uprv_strcmp(key, gPatternsShort) == 0) {
 573         patternsTableKey = PATTERNS_SHORT;
 574       } else if (uprv_strcmp(key, gPatternsLong) == 0) {
 575         patternsTableKey = PATTERNS_LONG;
 576       } else {
 577         continue;
 578       }
 579
 580       // Traverse into the formats table
 581       ResourceTable formatsTable = value.getTable(errorCode);
 582       if (U_FAILURE(errorCode)) { return; }
 583       for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) {
 584
 585         // Check for decimalFormat or currencyFormat
 586         EFormatsTableKey formatsTableKey;
 587         if (uprv_strcmp(key, gDecimalFormatTag) == 0) {
 588           formatsTableKey = DECIMAL_FORMAT;
 589         // TODO: Enable this statement when currency support is added
 590         // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) {
 591         //   formatsTableKey = CURRENCY_FORMAT;
 592         } else {
 593           continue;
 594         }
 595
 596         // Set the current style and destination based on the two keys
 597         UNumberCompactStyle style;
 598         CDFLocaleStyleData* destination = NULL;
 599         if (patternsTableKey == PATTERNS_LONG
 600             && formatsTableKey == DECIMAL_FORMAT) {
 601           style = UNUM_LONG;
 602           destination = &dataBundle.longData;
 603         } else if (patternsTableKey == PATTERNS_SHORT
 604             && formatsTableKey == DECIMAL_FORMAT) {
 605           style = UNUM_SHORT;
 606           destination = &dataBundle.shortData;
 607         // TODO: Enable the following statements when currency support is added
 608         // } else if (patternsTableKey == PATTERNS_SHORT
 609         //     && formatsTableKey == CURRENCY_FORMAT) {
 610         //   style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named
 611         //   destination = &dataBundle.shortCurrencyData;
 612         // } else {
 613         //   // Silently ignore this case
 614         //   continue;
 615         }
 616
 617         // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE:
 618         //   1) Don't consume longData if shortData was consumed from the non-Latin
 619         //      locale numbering system
 620         //   2) Don't consume longData for the first time if this is the root bundle and
 621         //      shortData is already populated from a more specific locale. Note that if
 622         //      both longData and shortData are both only in root, longData will be
 623         //      consumed since it is alphabetically before shortData in the bundle.
 624         if (isFallback
 625                 && style == UNUM_LONG
 626                 && !dataBundle.shortData.isEmpty()
 627                 && !dataBundle.shortData.fromFallback) {
 628             continue;
 629         }
 630         if (isRoot
 631                 && style == UNUM_LONG
 632                 && dataBundle.longData.isEmpty()
 633                 && !dataBundle.shortData.isEmpty()) {
 634             continue;
 635         }
 636
 637         // Set the "fromFallback" flag on the data object
 638         destination->fromFallback = isFallback;
 639
 640         // Traverse into the powers of ten table
 641         ResourceTable powersOfTenTable = value.getTable(errorCode);
 642         if (U_FAILURE(errorCode)) { return; }
 643         for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
 644
 645           // The key will always be some even power of 10. e.g 10000.
 646           char* endPtr = NULL;
 647           double power10 = uprv_strtod(key, &endPtr);
 648           if (*endPtr != 0) {
 649             errorCode = U_INTERNAL_PROGRAM_ERROR;
 650             return;
 651           }
 652           int32_t log10Value = computeLog10(power10, FALSE);
 653
 654           // Silently ignore divisors that are too big.
 655           if (log10Value >= MAX_DIGITS) continue;
 656
 657           // Iterate over the plural variants ("one", "other", etc)
 658           ResourceTable pluralVariantsTable = value.getTable(errorCode);
 659           if (U_FAILURE(errorCode)) { return; }
 660           for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
 661             const char* pluralVariant = key;
 662             const UnicodeString formatStr = value.getUnicodeString(errorCode);
 663
 664             // Copy the data into the in-memory data bundle (do not overwrite
 665             // existing values)
 666             int32_t numZeros = populatePrefixSuffix(
 667                 pluralVariant, log10Value, formatStr,
 668                 destination->unitsByVariant, FALSE, errorCode);
 669
 670             // If populatePrefixSuffix returns -1, it means that this key has been
 671             // encountered already.
 672             if (numZeros < 0) {
 673               continue;
 674             }
 675
 676             // Set the divisor, which is based on the number of zeros in the template
 677             // string.  If the divisor from here is different from the one previously
 678             // stored, it means that the number of zeros in different plural variants
 679             // differs; throw an exception.
 680             // TODO: How should I check for floating-point errors here?
 681             //       Is there a good reason why "divisor" is double and not long like Java?
 682             double divisor = calculateDivisor(power10, numZeros);
 683             if (destination->divisors[log10Value] != 0.0
 684                 && destination->divisors[log10Value] != divisor) {
 685               errorCode = U_INTERNAL_PROGRAM_ERROR;
 686               return;
 687             }
 688             destination->divisors[log10Value] = divisor;
 689           }
 690         }
 691       }
 692     }
 693   }
 694 };
 695
 696 // Virtual destructors must be defined out of line.
 697 CmptDecDataSink::~CmptDecDataSink() {}
 698
 699 } // namespace
 700
 701 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
 702   LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
 703   if (U_FAILURE(status)) {
 704     return;
 705   }
 706   const char* nsName = ns->getName();
 707
 708   LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status));
 709   if (U_FAILURE(status)) {
 710     return;
 711   }
 712   CmptDecDataSink sink(*result);
 713   sink.isFallback = FALSE;
 714
 715   // First load the number elements data if nsName is not Latin.
 716   if (uprv_strcmp(nsName, gLatnTag) != 0) {
 717     sink.isLatin = FALSE;
 718     CharString path;
 719     path.append(gNumberElementsTag, status)
 720         .append('/', status)
 721         .append(nsName, status);
 722     ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status);
 723     if (status == U_MISSING_RESOURCE_ERROR) {
 724       // Silently ignore and use Latin
 725       status = U_ZERO_ERROR;
 726     } else if  (U_FAILURE(status)) {
 727       return;
 728     }
 729     sink.isFallback = TRUE;
 730   }
 731
 732   // Now load Latin.
 733   sink.isLatin = TRUE;
 734   ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status);
 735   if (U_FAILURE(status)) return;
 736
 737   // If longData is empty, default it to be equal to shortData
 738   if (result->longData.isEmpty()) {
 739     result->longData.setToBogus();
 740   }
 741
 742   // Check for "other" variants in each of the three data classes, and resolve missing elements.
 743
 744   if (!result->longData.isBogus()) {
 745     checkForOtherVariants(&result->longData, status);
 746     if (U_FAILURE(status)) return;
 747     fillInMissing(&result->longData);
 748   }
 749
 750   checkForOtherVariants(&result->shortData, status);
 751   if (U_FAILURE(status)) return;
 752   fillInMissing(&result->shortData);
 753
 754   // TODO: Enable this statement when currency support is added
 755   // checkForOtherVariants(&result->shortCurrencyData, status);
 756   // if (U_FAILURE(status)) return;
 757   // fillInMissing(&result->shortCurrencyData);
 758 }
 759
 760 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
 761 // given variant and log10 value.
 762 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
 763 // formatStr is the format string from which the prefix and suffix are
 764 // extracted. It is usually of form 'Pefix 000 suffix'.
 765 // populatePrefixSuffix returns the number of 0's found in formatStr
 766 // before the decimal point.
 767 // In the special case that formatStr contains only spaces for prefix
 768 // and suffix, populatePrefixSuffix returns log10Value + 1.
 769 static int32_t populatePrefixSuffix(
 770     const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) {
 771   if (U_FAILURE(status)) {
 772     return 0;
 773   }
 774
 775   // ICU 59 HACK: Ignore negative part of format string, mimicking ICU 58 behavior.
 776   // TODO(sffc): Make sure this is fixed during the overhaul port in ICU 60.
 777   int32_t semiPos = formatStr.indexOf(';', 0);
 778   if (semiPos == -1) {
 779     semiPos = formatStr.length();
 780   }
 781   UnicodeString positivePart = formatStr.tempSubString(0, semiPos);
 782
 783   int32_t firstIdx = positivePart.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
 784   // We must have 0's in format string.
 785   if (firstIdx == -1) {
 786     status = U_INTERNAL_PROGRAM_ERROR;
 787     return 0;
 788   }
 789   int32_t lastIdx = positivePart.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
 790   CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
 791   if (U_FAILURE(status)) {
 792     return 0;
 793   }
 794
 795   // Return -1 if we are not overwriting an existing value
 796   if (unit->isSet() && !overwrite) {
 797     return -1;
 798   }
 799   unit->markAsSet();
 800
 801   // Everything up to first 0 is the prefix
 802   unit->prefix = positivePart.tempSubString(0, firstIdx);
 803   fixQuotes(unit->prefix);
 804   // Everything beyond the last 0 is the suffix
 805   unit->suffix = positivePart.tempSubString(lastIdx + 1);
 806   fixQuotes(unit->suffix);
 807
 808   // If there is effectively no prefix or suffix, ignore the actual number of
 809   // 0's and act as if the number of 0's matches the size of the number.
 810   if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
 811     return log10Value + 1;
 812   }
 813
 814   // Calculate number of zeros before decimal point
 815   int32_t idx = firstIdx + 1;
 816   while (idx <= lastIdx && positivePart.charAt(idx) == u_0) {
 817     ++idx;
 818   }
 819   return (idx - firstIdx);
 820 }
 821
 822 // Calculate a divisor based on the magnitude and number of zeros in the
 823 // template string.
 824 static double calculateDivisor(double power10, int32_t numZeros) {
 825   double divisor = power10;
 826   for (int32_t i = 1; i < numZeros; ++i) {
 827     divisor /= 10.0;
 828   }
 829   return divisor;
 830 }
 831
 832 static UBool onlySpaces(UnicodeString u) {
 833   return u.trim().length() == 0;
 834 }
 835
 836 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
 837 // Modifies s in place.
 838 static void fixQuotes(UnicodeString& s) {
 839   QuoteState state = OUTSIDE;
 840   int32_t len = s.length();
 841   int32_t dest = 0;
 842   for (int32_t i = 0; i < len; ++i) {
 843     UChar ch = s.charAt(i);
 844     if (ch == u_apos) {
 845       if (state == INSIDE_EMPTY) {
 846         s.setCharAt(dest, ch);
 847         ++dest;
 848       }
 849     } else {
 850       s.setCharAt(dest, ch);
 851       ++dest;
 852     }
 853
 854     // Update state
 855     switch (state) {
 856       case OUTSIDE:
 857         state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
 858         break;
 859       case INSIDE_EMPTY:
 860       case INSIDE_FULL:
 861         state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
 862         break;
 863       default:
 864         break;
 865     }
 866   }
 867   s.truncate(dest);
 868 }
 869
 870 // Checks to make sure that an "other" variant is present in all
 871 // powers of 10.
 872 static void checkForOtherVariants(CDFLocaleStyleData* result,
 873     UErrorCode& status) {
 874   if (result == NULL || result->unitsByVariant == NULL) {
 875     return;
 876   }
 877
 878   const CDFUnit* otherByBase =
 879       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
 880   if (otherByBase == NULL) {
 881     status = U_INTERNAL_PROGRAM_ERROR;
 882     return;
 883   }
 884
 885   // Check all other plural variants, and make sure that if
 886   // any of them are populated, then other is also populated
 887   int32_t pos = UHASH_FIRST;
 888   const UHashElement* element;
 889   while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) {
 890     CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer;
 891     if (variantsByBase == otherByBase) continue;
 892     for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) {
 893       if (variantsByBase[log10Value].isSet()
 894           && !otherByBase[log10Value].isSet()) {
 895         status = U_INTERNAL_PROGRAM_ERROR;
 896         return;
 897       }
 898     }
 899   }
 900 }
 901
 902 // fillInMissing ensures that the data in result is complete.
 903 // result data is complete if for each variant in result, there exists
 904 // a prefix-suffix pair for each log10 value and there also exists
 905 // a divisor for each log10 value.
 906 //
 907 // First this function figures out for which log10 values, the other
 908 // variant already had data. These are the same log10 values defined
 909 // in CLDR.
 910 //
 911 // For each log10 value not defined in CLDR, it uses the divisor for
 912 // the last defined log10 value or 1.
 913 //
 914 // Then for each variant, it does the following. For each log10
 915 // value not defined in CLDR, copy the prefix-suffix pair from the
 916 // previous log10 value. If log10 value is defined in CLDR but is
 917 // missing from given variant, copy the prefix-suffix pair for that
 918 // log10 value from the 'other' variant.
 919 static void fillInMissing(CDFLocaleStyleData* result) {
 920   const CDFUnit* otherUnits =
 921       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
 922   UBool definedInCLDR[MAX_DIGITS];
 923   double lastDivisor = 1.0;
 924   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
 925     if (!otherUnits[i].isSet()) {
 926       result->divisors[i] = lastDivisor;
 927       definedInCLDR[i] = FALSE;
 928     } else {
 929       lastDivisor = result->divisors[i];
 930       definedInCLDR[i] = TRUE;
 931     }
 932   }
 933   // Iterate over each variant.
 934   int32_t pos = UHASH_FIRST;
 935   const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
 936   for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
 937     CDFUnit* units = (CDFUnit*) element->value.pointer;
 938     for (int32_t i = 0; i < MAX_DIGITS; ++i) {
 939       if (definedInCLDR[i]) {
 940         if (!units[i].isSet()) {
 941           units[i] = otherUnits[i];
 942         }
 943       } else {
 944         if (i == 0) {
 945           units[0].markAsSet();
 946         } else {
 947           units[i] = units[i - 1];
 948         }
 949       }
 950     }
 951   }
 952 }
 953
 954 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
 955 // value computeLog10 will return MAX_DIGITS -1 even for
 956 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
 957 // up to MAX_DIGITS.
 958 static int32_t computeLog10(double x, UBool inRange) {
 959   int32_t result = 0;
 960   int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
 961   while (x >= 10.0) {
 962     x /= 10.0;
 963     ++result;
 964     if (result == max) {
 965       break;
 966     }
 967   }
 968   return result;
 969 }
 970
 971 // createCDFUnit returns a pointer to the prefix-suffix pair for a given
 972 // variant and log10 value within table. If no such prefix-suffix pair is
 973 // stored in table, one is created within table before returning pointer.
 974 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
 975   if (U_FAILURE(status)) {
 976     return NULL;
 977   }
 978   CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
 979   if (cdfUnit == NULL) {
 980     cdfUnit = new CDFUnit[MAX_DIGITS];
 981     if (cdfUnit == NULL) {
 982       status = U_MEMORY_ALLOCATION_ERROR;
 983       return NULL;
 984     }
 985     uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
 986     if (U_FAILURE(status)) {
 987       return NULL;
 988     }
 989   }
 990   CDFUnit* result = &cdfUnit[log10Value];
 991   return result;
 992 }
 993
 994 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
 995 // variant and log10 value within table. If the given variant doesn't exist, it
 996 // falls back to the OTHER variant. Therefore, this method will always return
 997 // some non-NULL value.
 998 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
 999   CharString cvariant;
1000   UErrorCode status = U_ZERO_ERROR;
1001   const CDFUnit *cdfUnit = NULL;
1002   cvariant.appendInvariantChars(variant, status);
1003   if (!U_FAILURE(status)) {
1004     cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
1005   }
1006   if (cdfUnit == NULL) {
1007     cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
1008   }
1009   return &cdfUnit[log10Value];
1010 }
1011
1012 U_NAMESPACE_END
1013 #endif