]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/compactdecimalformat.cpp
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / compactdecimalformat.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
51004dcb
A
3/*
4*******************************************************************************
2ca993e8 5* Copyright (C) 1997-2015, International Business Machines Corporation and *
51004dcb
A
6* others. All Rights Reserved. *
7*******************************************************************************
8*
9* File COMPACTDECIMALFORMAT.CPP
10*
11********************************************************************************
12*/
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_FORMATTING
16
17#include "charstr.h"
18#include "cstring.h"
19#include "digitlst.h"
20#include "mutex.h"
21#include "unicode/compactdecimalformat.h"
22#include "unicode/numsys.h"
23#include "unicode/plurrule.h"
24#include "unicode/ures.h"
25#include "ucln_in.h"
26#include "uhash.h"
27#include "umutex.h"
28#include "unicode/ures.h"
29#include "uresimp.h"
30
51004dcb
A
31// Maps locale name to CDFLocaleData struct.
32static UHashtable* gCompactDecimalData = NULL;
33static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
34
35U_NAMESPACE_BEGIN
36
37static const int32_t MAX_DIGITS = 15;
38static const char gOther[] = "other";
39static const char gLatnTag[] = "latn";
40static const char gNumberElementsTag[] = "NumberElements";
41static const char gDecimalFormatTag[] = "decimalFormat";
42static const char gPatternsShort[] = "patternsShort";
43static const char gPatternsLong[] = "patternsLong";
f3c0d7a5 44static const char gLatnPath[] = "NumberElements/latn";
51004dcb
A
45
46static const UChar u_0 = 0x30;
47static const UChar u_apos = 0x27;
48
49static const UChar kZero[] = {u_0};
50
51// Used to unescape single quotes.
52enum QuoteState {
53 OUTSIDE,
54 INSIDE_EMPTY,
55 INSIDE_FULL
56};
57
58enum FallbackFlags {
59 ANY = 0,
60 MUST = 1,
61 NOT_ROOT = 2
62 // Next one will be 4 then 6 etc.
63};
64
65
66// CDFUnit represents a prefix-suffix pair for a particular variant
67// and log10 value.
68struct CDFUnit : public UMemory {
69 UnicodeString prefix;
70 UnicodeString suffix;
71 inline CDFUnit() : prefix(), suffix() {
72 prefix.setToBogus();
73 }
74 inline ~CDFUnit() {}
75 inline UBool isSet() const {
76 return !prefix.isBogus();
77 }
78 inline void markAsSet() {
79 prefix.remove();
80 }
81};
82
83// CDFLocaleStyleData contains formatting data for a particular locale
84// and style.
85class CDFLocaleStyleData : public UMemory {
86 public:
87 // What to divide by for each log10 value when formatting. These values
88 // will be powers of 10. For English, would be:
89 // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
90 double divisors[MAX_DIGITS];
91 // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
92 // To format a number x,
93 // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
94 // Compute the plural variant for displayNum
95 // (e.g zero, one, two, few, many, other).
96 // Compute cdfUnits = unitsByVariant[pluralVariant].
97 // Prefix and suffix to use at cdfUnits[log10(x)]
98 UHashtable* unitsByVariant;
f3c0d7a5
A
99 // A flag for whether or not this CDFLocaleStyleData was loaded from the
100 // Latin numbering system as a fallback from the locale numbering system.
101 // This value is meaningless if the object is bogus or empty.
102 UBool fromFallback;
103 inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) {
104 uprv_memset(divisors, 0, sizeof(divisors));
105 }
51004dcb
A
106 ~CDFLocaleStyleData();
107 // Init initializes this object.
108 void Init(UErrorCode& status);
109 inline UBool isBogus() const {
110 return unitsByVariant == NULL;
111 }
112 void setToBogus();
f3c0d7a5
A
113 UBool isEmpty() {
114 return unitsByVariant == NULL || unitsByVariant->count == 0;
115 }
51004dcb
A
116 private:
117 CDFLocaleStyleData(const CDFLocaleStyleData&);
118 CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
119};
120
121// CDFLocaleData contains formatting data for a particular locale.
122struct CDFLocaleData : public UMemory {
123 CDFLocaleStyleData shortData;
124 CDFLocaleStyleData longData;
125 inline CDFLocaleData() : shortData(), longData() { }
126 inline ~CDFLocaleData() { }
127 // Init initializes this object.
128 void Init(UErrorCode& status);
129};
130
131U_NAMESPACE_END
132
133U_CDECL_BEGIN
134
135static UBool U_CALLCONV cdf_cleanup(void) {
136 if (gCompactDecimalData != NULL) {
137 uhash_close(gCompactDecimalData);
138 gCompactDecimalData = NULL;
139 }
140 return TRUE;
141}
142
143static void U_CALLCONV deleteCDFUnits(void* ptr) {
144 delete [] (icu::CDFUnit*) ptr;
145}
146
147static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
148 delete (icu::CDFLocaleData*) ptr;
149}
150
151U_CDECL_END
152
153U_NAMESPACE_BEGIN
154
155static UBool divisors_equal(const double* lhs, const double* rhs);
156static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
157
158static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
159static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
f3c0d7a5
A
160static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
161static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status);
162static double calculateDivisor(double power10, int32_t numZeros);
51004dcb
A
163static UBool onlySpaces(UnicodeString u);
164static void fixQuotes(UnicodeString& s);
f3c0d7a5 165static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status);
51004dcb
A
166static void fillInMissing(CDFLocaleStyleData* result);
167static int32_t computeLog10(double x, UBool inRange);
168static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
169static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
170
171UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
172
173CompactDecimalFormat::CompactDecimalFormat(
174 const DecimalFormat& decimalFormat,
175 const UHashtable* unitsByVariant,
176 const double* divisors,
177 PluralRules* pluralRules)
178 : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
179}
180
181CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
182 : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
183}
184
185CompactDecimalFormat* U_EXPORT2
186CompactDecimalFormat::createInstance(
187 const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
188 LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
189 if (U_FAILURE(status)) {
190 return NULL;
191 }
192 LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
193 if (U_FAILURE(status)) {
194 return NULL;
195 }
196 const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
197 if (U_FAILURE(status)) {
198 return NULL;
199 }
200 CompactDecimalFormat* result =
201 new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
202 if (result == NULL) {
203 status = U_MEMORY_ALLOCATION_ERROR;
204 return NULL;
205 }
206 pluralRules.orphan();
207 result->setMaximumSignificantDigits(3);
208 result->setSignificantDigitsUsed(TRUE);
209 result->setGroupingUsed(FALSE);
210 return result;
211}
212
213CompactDecimalFormat&
214CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
215 if (this != &rhs) {
216 DecimalFormat::operator=(rhs);
217 _unitsByVariant = rhs._unitsByVariant;
218 _divisors = rhs._divisors;
219 delete _pluralRules;
220 _pluralRules = rhs._pluralRules->clone();
221 }
222 return *this;
223}
224
225CompactDecimalFormat::~CompactDecimalFormat() {
226 delete _pluralRules;
227}
228
229
230Format*
231CompactDecimalFormat::clone(void) const {
232 return new CompactDecimalFormat(*this);
233}
234
235UBool
236CompactDecimalFormat::operator==(const Format& that) const {
237 if (this == &that) {
238 return TRUE;
239 }
240 return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
241}
242
243UBool
244CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
245 return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
246}
247
248UnicodeString&
249CompactDecimalFormat::format(
250 double number,
251 UnicodeString& appendTo,
252 FieldPosition& pos) const {
2ca993e8
A
253 UErrorCode status = U_ZERO_ERROR;
254 return format(number, appendTo, pos, status);
255}
256
257UnicodeString&
258CompactDecimalFormat::format(
259 double number,
260 UnicodeString& appendTo,
261 FieldPosition& pos,
262 UErrorCode &status) const {
263 if (U_FAILURE(status)) {
264 return appendTo;
265 }
51004dcb
A
266 DigitList orig, rounded;
267 orig.set(number);
268 UBool isNegative;
51004dcb
A
269 _round(orig, rounded, isNegative, status);
270 if (U_FAILURE(status)) {
271 return appendTo;
272 }
273 double roundedDouble = rounded.getDouble();
274 if (isNegative) {
275 roundedDouble = -roundedDouble;
276 }
277 int32_t baseIdx = computeLog10(roundedDouble, TRUE);
278 double numberToFormat = roundedDouble / _divisors[baseIdx];
279 UnicodeString variant = _pluralRules->select(numberToFormat);
280 if (isNegative) {
281 numberToFormat = -numberToFormat;
282 }
283 const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
284 appendTo += unit->prefix;
285 DecimalFormat::format(numberToFormat, appendTo, pos);
286 appendTo += unit->suffix;
287 return appendTo;
288}
289
290UnicodeString&
291CompactDecimalFormat::format(
292 double /* number */,
293 UnicodeString& appendTo,
294 FieldPositionIterator* /* posIter */,
295 UErrorCode& status) const {
296 status = U_UNSUPPORTED_ERROR;
297 return appendTo;
298}
299
2ca993e8
A
300UnicodeString&
301CompactDecimalFormat::format(
302 int32_t number,
303 UnicodeString& appendTo,
304 FieldPosition& pos) const {
305 return format((double) number, appendTo, pos);
306}
307
308UnicodeString&
309CompactDecimalFormat::format(
310 int32_t number,
311 UnicodeString& appendTo,
312 FieldPosition& pos,
313 UErrorCode &status) const {
314 return format((double) number, appendTo, pos, status);
315}
316
317UnicodeString&
318CompactDecimalFormat::format(
319 int32_t /* number */,
320 UnicodeString& appendTo,
321 FieldPositionIterator* /* posIter */,
322 UErrorCode& status) const {
323 status = U_UNSUPPORTED_ERROR;
324 return appendTo;
325}
326
51004dcb
A
327UnicodeString&
328CompactDecimalFormat::format(
329 int64_t number,
330 UnicodeString& appendTo,
331 FieldPosition& pos) const {
332 return format((double) number, appendTo, pos);
333}
334
2ca993e8
A
335UnicodeString&
336CompactDecimalFormat::format(
337 int64_t number,
338 UnicodeString& appendTo,
339 FieldPosition& pos,
340 UErrorCode &status) const {
341 return format((double) number, appendTo, pos, status);
342}
343
51004dcb
A
344UnicodeString&
345CompactDecimalFormat::format(
346 int64_t /* number */,
347 UnicodeString& appendTo,
348 FieldPositionIterator* /* posIter */,
349 UErrorCode& status) const {
350 status = U_UNSUPPORTED_ERROR;
351 return appendTo;
352}
353
354UnicodeString&
355CompactDecimalFormat::format(
f3c0d7a5 356 StringPiece /* number */,
51004dcb
A
357 UnicodeString& appendTo,
358 FieldPositionIterator* /* posIter */,
359 UErrorCode& status) const {
360 status = U_UNSUPPORTED_ERROR;
361 return appendTo;
362}
363
364UnicodeString&
365CompactDecimalFormat::format(
366 const DigitList& /* number */,
367 UnicodeString& appendTo,
368 FieldPositionIterator* /* posIter */,
369 UErrorCode& status) const {
370 status = U_UNSUPPORTED_ERROR;
371 return appendTo;
372}
373
374UnicodeString&
375CompactDecimalFormat::format(const DigitList& /* number */,
376 UnicodeString& appendTo,
377 FieldPosition& /* pos */,
378 UErrorCode& status) const {
379 status = U_UNSUPPORTED_ERROR;
380 return appendTo;
381}
382
383void
384CompactDecimalFormat::parse(
385 const UnicodeString& /* text */,
386 Formattable& /* result */,
387 ParsePosition& /* parsePosition */) const {
388}
389
390void
391CompactDecimalFormat::parse(
392 const UnicodeString& /* text */,
393 Formattable& /* result */,
394 UErrorCode& status) const {
395 status = U_UNSUPPORTED_ERROR;
396}
397
398CurrencyAmount*
399CompactDecimalFormat::parseCurrency(
400 const UnicodeString& /* text */,
401 ParsePosition& /* pos */) const {
402 return NULL;
403}
404
405void CDFLocaleStyleData::Init(UErrorCode& status) {
406 if (unitsByVariant != NULL) {
407 return;
408 }
409 unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
410 if (U_FAILURE(status)) {
411 return;
412 }
413 uhash_setKeyDeleter(unitsByVariant, uprv_free);
414 uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
415}
416
417CDFLocaleStyleData::~CDFLocaleStyleData() {
418 setToBogus();
419}
420
421void CDFLocaleStyleData::setToBogus() {
422 if (unitsByVariant != NULL) {
423 uhash_close(unitsByVariant);
424 unitsByVariant = NULL;
425 }
426}
427
428void CDFLocaleData::Init(UErrorCode& status) {
429 shortData.Init(status);
430 if (U_FAILURE(status)) {
431 return;
432 }
433 longData.Init(status);
434}
435
436// Helper method for operator=
437static UBool divisors_equal(const double* lhs, const double* rhs) {
438 for (int32_t i = 0; i < MAX_DIGITS; ++i) {
439 if (lhs[i] != rhs[i]) {
440 return FALSE;
441 }
442 }
443 return TRUE;
444}
445
446// getCDFLocaleStyleData returns pointer to formatting data for given locale and
447// style within the global cache. On cache miss, getCDFLocaleStyleData loads
448// the data from CLDR into the global cache before returning the pointer. If a
449// UNUM_LONG data is requested for a locale, and that locale does not have
450// UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
451// that locale.
452static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
453 if (U_FAILURE(status)) {
454 return NULL;
455 }
456 CDFLocaleData* result = NULL;
457 const char* key = inLocale.getName();
458 {
459 Mutex lock(&gCompactDecimalMetaLock);
460 if (gCompactDecimalData == NULL) {
461 gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
462 if (U_FAILURE(status)) {
463 return NULL;
464 }
465 uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
466 uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
467 ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
468 } else {
469 result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
470 }
471 }
472 if (result != NULL) {
473 return extractDataByStyleEnum(*result, style, status);
474 }
475
476 result = loadCDFLocaleData(inLocale, status);
477 if (U_FAILURE(status)) {
478 return NULL;
479 }
480
481 {
482 Mutex lock(&gCompactDecimalMetaLock);
483 CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
484 if (temp != NULL) {
485 delete result;
486 result = temp;
487 } else {
488 uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
489 if (U_FAILURE(status)) {
490 return NULL;
491 }
492 }
493 }
494 return extractDataByStyleEnum(*result, style, status);
495}
496
497static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
498 switch (style) {
499 case UNUM_SHORT:
500 return &data.shortData;
501 case UNUM_LONG:
502 if (!data.longData.isBogus()) {
503 return &data.longData;
504 }
505 return &data.shortData;
506 default:
507 status = U_ILLEGAL_ARGUMENT_ERROR;
508 return NULL;
509 }
510}
511
512// loadCDFLocaleData loads formatting data from CLDR for a given locale. The
513// caller owns the returned pointer.
514static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
515 if (U_FAILURE(status)) {
516 return NULL;
517 }
518 CDFLocaleData* result = new CDFLocaleData;
519 if (result == NULL) {
520 status = U_MEMORY_ALLOCATION_ERROR;
521 return NULL;
522 }
523 result->Init(status);
524 if (U_FAILURE(status)) {
525 delete result;
526 return NULL;
527 }
528
f3c0d7a5
A
529 load(inLocale, result, status);
530
51004dcb
A
531 if (U_FAILURE(status)) {
532 delete result;
533 return NULL;
534 }
535 return result;
536}
537
f3c0d7a5 538namespace {
51004dcb 539
f3c0d7a5
A
540struct CmptDecDataSink : public ResourceSink {
541
542 CDFLocaleData& dataBundle; // Where to save values when they are read
543 UBool isLatin; // Whether or not we are traversing the Latin tree
544 UBool isFallback; // Whether or not we are traversing the Latin tree as fallback
545
546 enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG };
547 enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT };
548
549 /*
550 * NumberElements{ <-- top (numbering system table)
551 * latn{ <-- patternsTable (one per numbering system)
552 * patternsLong{ <-- formatsTable (one per pattern)
553 * decimalFormat{ <-- powersOfTenTable (one per format)
554 * 1000{ <-- pluralVariantsTable (one per power of ten)
555 * one{"0 thousand"} <-- plural variant and template
556 */
51004dcb 557
f3c0d7a5
A
558 CmptDecDataSink(CDFLocaleData& _dataBundle)
559 : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {}
560 virtual ~CmptDecDataSink();
561
562 virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) {
563 // SPECIAL CASE: Don't consume root in the non-Latin numbering system
564 if (isRoot && !isLatin) { return; }
565
566 ResourceTable patternsTable = value.getTable(errorCode);
567 if (U_FAILURE(errorCode)) { return; }
568 for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) {
569
570 // Check for patternsShort or patternsLong
571 EPatternsTableKey patternsTableKey;
572 if (uprv_strcmp(key, gPatternsShort) == 0) {
573 patternsTableKey = PATTERNS_SHORT;
574 } else if (uprv_strcmp(key, gPatternsLong) == 0) {
575 patternsTableKey = PATTERNS_LONG;
576 } else {
577 continue;
51004dcb 578 }
51004dcb 579
f3c0d7a5
A
580 // Traverse into the formats table
581 ResourceTable formatsTable = value.getTable(errorCode);
582 if (U_FAILURE(errorCode)) { return; }
583 for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) {
584
585 // Check for decimalFormat or currencyFormat
586 EFormatsTableKey formatsTableKey;
587 if (uprv_strcmp(key, gDecimalFormatTag) == 0) {
588 formatsTableKey = DECIMAL_FORMAT;
589 // TODO: Enable this statement when currency support is added
590 // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) {
591 // formatsTableKey = CURRENCY_FORMAT;
592 } else {
593 continue;
594 }
595
596 // Set the current style and destination based on the two keys
597 UNumberCompactStyle style;
598 CDFLocaleStyleData* destination = NULL;
599 if (patternsTableKey == PATTERNS_LONG
600 && formatsTableKey == DECIMAL_FORMAT) {
601 style = UNUM_LONG;
602 destination = &dataBundle.longData;
603 } else if (patternsTableKey == PATTERNS_SHORT
604 && formatsTableKey == DECIMAL_FORMAT) {
605 style = UNUM_SHORT;
606 destination = &dataBundle.shortData;
607 // TODO: Enable the following statements when currency support is added
608 // } else if (patternsTableKey == PATTERNS_SHORT
609 // && formatsTableKey == CURRENCY_FORMAT) {
610 // style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named
611 // destination = &dataBundle.shortCurrencyData;
612 // } else {
613 // // Silently ignore this case
614 // continue;
615 }
616
617 // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE:
618 // 1) Don't consume longData if shortData was consumed from the non-Latin
619 // locale numbering system
620 // 2) Don't consume longData for the first time if this is the root bundle and
621 // shortData is already populated from a more specific locale. Note that if
622 // both longData and shortData are both only in root, longData will be
623 // consumed since it is alphabetically before shortData in the bundle.
624 if (isFallback
625 && style == UNUM_LONG
626 && !dataBundle.shortData.isEmpty()
627 && !dataBundle.shortData.fromFallback) {
628 continue;
629 }
630 if (isRoot
631 && style == UNUM_LONG
632 && dataBundle.longData.isEmpty()
633 && !dataBundle.shortData.isEmpty()) {
634 continue;
635 }
636
637 // Set the "fromFallback" flag on the data object
638 destination->fromFallback = isFallback;
639
640 // Traverse into the powers of ten table
641 ResourceTable powersOfTenTable = value.getTable(errorCode);
642 if (U_FAILURE(errorCode)) { return; }
643 for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
644
645 // The key will always be some even power of 10. e.g 10000.
646 char* endPtr = NULL;
647 double power10 = uprv_strtod(key, &endPtr);
648 if (*endPtr != 0) {
649 errorCode = U_INTERNAL_PROGRAM_ERROR;
650 return;
651 }
652 int32_t log10Value = computeLog10(power10, FALSE);
653
654 // Silently ignore divisors that are too big.
655 if (log10Value >= MAX_DIGITS) continue;
656
657 // Iterate over the plural variants ("one", "other", etc)
658 ResourceTable pluralVariantsTable = value.getTable(errorCode);
659 if (U_FAILURE(errorCode)) { return; }
660 for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
661 const char* pluralVariant = key;
662 const UnicodeString formatStr = value.getUnicodeString(errorCode);
663
664 // Copy the data into the in-memory data bundle (do not overwrite
665 // existing values)
666 int32_t numZeros = populatePrefixSuffix(
667 pluralVariant, log10Value, formatStr,
668 destination->unitsByVariant, FALSE, errorCode);
669
670 // If populatePrefixSuffix returns -1, it means that this key has been
671 // encountered already.
672 if (numZeros < 0) {
673 continue;
674 }
675
676 // Set the divisor, which is based on the number of zeros in the template
677 // string. If the divisor from here is different from the one previously
678 // stored, it means that the number of zeros in different plural variants
679 // differs; throw an exception.
680 // TODO: How should I check for floating-point errors here?
681 // Is there a good reason why "divisor" is double and not long like Java?
682 double divisor = calculateDivisor(power10, numZeros);
683 if (destination->divisors[log10Value] != 0.0
684 && destination->divisors[log10Value] != divisor) {
685 errorCode = U_INTERNAL_PROGRAM_ERROR;
686 return;
687 }
688 destination->divisors[log10Value] = divisor;
51004dcb 689 }
51004dcb 690 }
51004dcb 691 }
f3c0d7a5 692 }
51004dcb 693 }
f3c0d7a5 694};
51004dcb 695
f3c0d7a5
A
696// Virtual destructors must be defined out of line.
697CmptDecDataSink::~CmptDecDataSink() {}
51004dcb 698
f3c0d7a5 699} // namespace
51004dcb 700
f3c0d7a5
A
701static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
702 LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
51004dcb
A
703 if (U_FAILURE(status)) {
704 return;
705 }
f3c0d7a5 706 const char* nsName = ns->getName();
51004dcb 707
f3c0d7a5 708 LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status));
51004dcb
A
709 if (U_FAILURE(status)) {
710 return;
711 }
f3c0d7a5
A
712 CmptDecDataSink sink(*result);
713 sink.isFallback = FALSE;
714
715 // First load the number elements data if nsName is not Latin.
716 if (uprv_strcmp(nsName, gLatnTag) != 0) {
717 sink.isLatin = FALSE;
718 CharString path;
719 path.append(gNumberElementsTag, status)
720 .append('/', status)
721 .append(nsName, status);
722 ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status);
723 if (status == U_MISSING_RESOURCE_ERROR) {
724 // Silently ignore and use Latin
725 status = U_ZERO_ERROR;
726 } else if (U_FAILURE(status)) {
51004dcb
A
727 return;
728 }
f3c0d7a5 729 sink.isFallback = TRUE;
51004dcb 730 }
f3c0d7a5
A
731
732 // Now load Latin.
733 sink.isLatin = TRUE;
734 ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status);
735 if (U_FAILURE(status)) return;
736
737 // If longData is empty, default it to be equal to shortData
738 if (result->longData.isEmpty()) {
739 result->longData.setToBogus();
51004dcb 740 }
f3c0d7a5
A
741
742 // Check for "other" variants in each of the three data classes, and resolve missing elements.
743
744 if (!result->longData.isBogus()) {
745 checkForOtherVariants(&result->longData, status);
746 if (U_FAILURE(status)) return;
747 fillInMissing(&result->longData);
51004dcb 748 }
f3c0d7a5
A
749
750 checkForOtherVariants(&result->shortData, status);
751 if (U_FAILURE(status)) return;
752 fillInMissing(&result->shortData);
753
754 // TODO: Enable this statement when currency support is added
755 // checkForOtherVariants(&result->shortCurrencyData, status);
756 // if (U_FAILURE(status)) return;
757 // fillInMissing(&result->shortCurrencyData);
51004dcb
A
758}
759
760// populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
761// given variant and log10 value.
762// variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
763// formatStr is the format string from which the prefix and suffix are
764// extracted. It is usually of form 'Pefix 000 suffix'.
765// populatePrefixSuffix returns the number of 0's found in formatStr
766// before the decimal point.
767// In the special case that formatStr contains only spaces for prefix
768// and suffix, populatePrefixSuffix returns log10Value + 1.
769static int32_t populatePrefixSuffix(
f3c0d7a5 770 const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) {
51004dcb
A
771 if (U_FAILURE(status)) {
772 return 0;
773 }
f3c0d7a5
A
774
775 // ICU 59 HACK: Ignore negative part of format string, mimicking ICU 58 behavior.
776 // TODO(sffc): Make sure this is fixed during the overhaul port in ICU 60.
777 int32_t semiPos = formatStr.indexOf(';', 0);
778 if (semiPos == -1) {
779 semiPos = formatStr.length();
780 }
781 UnicodeString positivePart = formatStr.tempSubString(0, semiPos);
782
783 int32_t firstIdx = positivePart.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
51004dcb
A
784 // We must have 0's in format string.
785 if (firstIdx == -1) {
786 status = U_INTERNAL_PROGRAM_ERROR;
787 return 0;
788 }
f3c0d7a5 789 int32_t lastIdx = positivePart.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
51004dcb
A
790 CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
791 if (U_FAILURE(status)) {
792 return 0;
793 }
f3c0d7a5
A
794
795 // Return -1 if we are not overwriting an existing value
796 if (unit->isSet() && !overwrite) {
797 return -1;
798 }
799 unit->markAsSet();
800
51004dcb 801 // Everything up to first 0 is the prefix
f3c0d7a5 802 unit->prefix = positivePart.tempSubString(0, firstIdx);
51004dcb
A
803 fixQuotes(unit->prefix);
804 // Everything beyond the last 0 is the suffix
f3c0d7a5 805 unit->suffix = positivePart.tempSubString(lastIdx + 1);
51004dcb
A
806 fixQuotes(unit->suffix);
807
808 // If there is effectively no prefix or suffix, ignore the actual number of
809 // 0's and act as if the number of 0's matches the size of the number.
810 if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
811 return log10Value + 1;
812 }
813
814 // Calculate number of zeros before decimal point
815 int32_t idx = firstIdx + 1;
f3c0d7a5 816 while (idx <= lastIdx && positivePart.charAt(idx) == u_0) {
51004dcb
A
817 ++idx;
818 }
819 return (idx - firstIdx);
820}
821
f3c0d7a5
A
822// Calculate a divisor based on the magnitude and number of zeros in the
823// template string.
824static double calculateDivisor(double power10, int32_t numZeros) {
825 double divisor = power10;
826 for (int32_t i = 1; i < numZeros; ++i) {
827 divisor /= 10.0;
828 }
829 return divisor;
830}
831
51004dcb
A
832static UBool onlySpaces(UnicodeString u) {
833 return u.trim().length() == 0;
834}
835
836// fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
837// Modifies s in place.
838static void fixQuotes(UnicodeString& s) {
839 QuoteState state = OUTSIDE;
840 int32_t len = s.length();
841 int32_t dest = 0;
842 for (int32_t i = 0; i < len; ++i) {
843 UChar ch = s.charAt(i);
844 if (ch == u_apos) {
845 if (state == INSIDE_EMPTY) {
846 s.setCharAt(dest, ch);
847 ++dest;
848 }
849 } else {
850 s.setCharAt(dest, ch);
851 ++dest;
852 }
853
854 // Update state
855 switch (state) {
856 case OUTSIDE:
857 state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
858 break;
859 case INSIDE_EMPTY:
860 case INSIDE_FULL:
861 state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
862 break;
863 default:
864 break;
865 }
866 }
867 s.truncate(dest);
868}
869
f3c0d7a5
A
870// Checks to make sure that an "other" variant is present in all
871// powers of 10.
872static void checkForOtherVariants(CDFLocaleStyleData* result,
873 UErrorCode& status) {
874 if (result == NULL || result->unitsByVariant == NULL) {
875 return;
876 }
877
878 const CDFUnit* otherByBase =
879 (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
880 if (otherByBase == NULL) {
881 status = U_INTERNAL_PROGRAM_ERROR;
882 return;
883 }
884
885 // Check all other plural variants, and make sure that if
886 // any of them are populated, then other is also populated
887 int32_t pos = UHASH_FIRST;
888 const UHashElement* element;
889 while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) {
890 CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer;
891 if (variantsByBase == otherByBase) continue;
892 for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) {
893 if (variantsByBase[log10Value].isSet()
894 && !otherByBase[log10Value].isSet()) {
895 status = U_INTERNAL_PROGRAM_ERROR;
896 return;
897 }
898 }
899 }
900}
901
51004dcb
A
902// fillInMissing ensures that the data in result is complete.
903// result data is complete if for each variant in result, there exists
904// a prefix-suffix pair for each log10 value and there also exists
905// a divisor for each log10 value.
906//
907// First this function figures out for which log10 values, the other
908// variant already had data. These are the same log10 values defined
909// in CLDR.
910//
911// For each log10 value not defined in CLDR, it uses the divisor for
912// the last defined log10 value or 1.
913//
914// Then for each variant, it does the following. For each log10
915// value not defined in CLDR, copy the prefix-suffix pair from the
916// previous log10 value. If log10 value is defined in CLDR but is
917// missing from given variant, copy the prefix-suffix pair for that
918// log10 value from the 'other' variant.
919static void fillInMissing(CDFLocaleStyleData* result) {
920 const CDFUnit* otherUnits =
921 (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
922 UBool definedInCLDR[MAX_DIGITS];
923 double lastDivisor = 1.0;
924 for (int32_t i = 0; i < MAX_DIGITS; ++i) {
925 if (!otherUnits[i].isSet()) {
926 result->divisors[i] = lastDivisor;
927 definedInCLDR[i] = FALSE;
928 } else {
929 lastDivisor = result->divisors[i];
930 definedInCLDR[i] = TRUE;
931 }
932 }
933 // Iterate over each variant.
b331163b 934 int32_t pos = UHASH_FIRST;
51004dcb
A
935 const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
936 for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
937 CDFUnit* units = (CDFUnit*) element->value.pointer;
938 for (int32_t i = 0; i < MAX_DIGITS; ++i) {
939 if (definedInCLDR[i]) {
940 if (!units[i].isSet()) {
941 units[i] = otherUnits[i];
942 }
943 } else {
944 if (i == 0) {
945 units[0].markAsSet();
946 } else {
947 units[i] = units[i - 1];
948 }
949 }
950 }
951 }
952}
953
954// computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
955// value computeLog10 will return MAX_DIGITS -1 even for
956// numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
957// up to MAX_DIGITS.
958static int32_t computeLog10(double x, UBool inRange) {
959 int32_t result = 0;
960 int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
961 while (x >= 10.0) {
962 x /= 10.0;
963 ++result;
964 if (result == max) {
965 break;
966 }
967 }
968 return result;
969}
970
971// createCDFUnit returns a pointer to the prefix-suffix pair for a given
972// variant and log10 value within table. If no such prefix-suffix pair is
973// stored in table, one is created within table before returning pointer.
974static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
975 if (U_FAILURE(status)) {
976 return NULL;
977 }
978 CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
979 if (cdfUnit == NULL) {
980 cdfUnit = new CDFUnit[MAX_DIGITS];
981 if (cdfUnit == NULL) {
982 status = U_MEMORY_ALLOCATION_ERROR;
983 return NULL;
984 }
985 uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
986 if (U_FAILURE(status)) {
987 return NULL;
988 }
989 }
990 CDFUnit* result = &cdfUnit[log10Value];
51004dcb
A
991 return result;
992}
993
994// getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
995// variant and log10 value within table. If the given variant doesn't exist, it
996// falls back to the OTHER variant. Therefore, this method will always return
997// some non-NULL value.
998static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
999 CharString cvariant;
1000 UErrorCode status = U_ZERO_ERROR;
1001 const CDFUnit *cdfUnit = NULL;
1002 cvariant.appendInvariantChars(variant, status);
1003 if (!U_FAILURE(status)) {
1004 cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
1005 }
1006 if (cdfUnit == NULL) {
1007 cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
1008 }
1009 return &cdfUnit[log10Value];
1010}
1011
1012U_NAMESPACE_END
1013#endif