1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines Corporation and *
6 * others. All Rights Reserved. *
7 *******************************************************************************
9 * File COMPACTDECIMALFORMAT.CPP
11 ********************************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_FORMATTING
21 #include "unicode/compactdecimalformat.h"
22 #include "unicode/numsys.h"
23 #include "unicode/plurrule.h"
24 #include "unicode/ures.h"
28 #include "unicode/ures.h"
31 // Maps locale name to CDFLocaleData struct.
32 static UHashtable
* gCompactDecimalData
= NULL
;
33 static UMutex gCompactDecimalMetaLock
= U_MUTEX_INITIALIZER
;
37 static const int32_t MAX_DIGITS
= 15;
38 static const char gOther
[] = "other";
39 static const char gLatnTag
[] = "latn";
40 static const char gNumberElementsTag
[] = "NumberElements";
41 static const char gDecimalFormatTag
[] = "decimalFormat";
42 static const char gPatternsShort
[] = "patternsShort";
43 static const char gPatternsLong
[] = "patternsLong";
44 static const char gLatnPath
[] = "NumberElements/latn";
46 static const UChar u_0
= 0x30;
47 static const UChar u_apos
= 0x27;
49 static const UChar kZero
[] = {u_0
};
51 // Used to unescape single quotes.
62 // Next one will be 4 then 6 etc.
66 // CDFUnit represents a prefix-suffix pair for a particular variant
68 struct CDFUnit
: public UMemory
{
71 inline CDFUnit() : prefix(), suffix() {
75 inline UBool
isSet() const {
76 return !prefix
.isBogus();
78 inline void markAsSet() {
83 // CDFLocaleStyleData contains formatting data for a particular locale
85 class CDFLocaleStyleData
: public UMemory
{
87 // What to divide by for each log10 value when formatting. These values
88 // will be powers of 10. For English, would be:
89 // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
90 double divisors
[MAX_DIGITS
];
91 // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
92 // To format a number x,
93 // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
94 // Compute the plural variant for displayNum
95 // (e.g zero, one, two, few, many, other).
96 // Compute cdfUnits = unitsByVariant[pluralVariant].
97 // Prefix and suffix to use at cdfUnits[log10(x)]
98 UHashtable
* unitsByVariant
;
99 // A flag for whether or not this CDFLocaleStyleData was loaded from the
100 // Latin numbering system as a fallback from the locale numbering system.
101 // This value is meaningless if the object is bogus or empty.
103 inline CDFLocaleStyleData() : unitsByVariant(NULL
), fromFallback(FALSE
) {
104 uprv_memset(divisors
, 0, sizeof(divisors
));
106 ~CDFLocaleStyleData();
107 // Init initializes this object.
108 void Init(UErrorCode
& status
);
109 inline UBool
isBogus() const {
110 return unitsByVariant
== NULL
;
114 return unitsByVariant
== NULL
|| unitsByVariant
->count
== 0;
117 CDFLocaleStyleData(const CDFLocaleStyleData
&);
118 CDFLocaleStyleData
& operator=(const CDFLocaleStyleData
&);
121 // CDFLocaleData contains formatting data for a particular locale.
122 struct CDFLocaleData
: public UMemory
{
123 CDFLocaleStyleData shortData
;
124 CDFLocaleStyleData longData
;
125 inline CDFLocaleData() : shortData(), longData() { }
126 inline ~CDFLocaleData() { }
127 // Init initializes this object.
128 void Init(UErrorCode
& status
);
135 static UBool U_CALLCONV
cdf_cleanup(void) {
136 if (gCompactDecimalData
!= NULL
) {
137 uhash_close(gCompactDecimalData
);
138 gCompactDecimalData
= NULL
;
143 static void U_CALLCONV
deleteCDFUnits(void* ptr
) {
144 delete [] (icu::CDFUnit
*) ptr
;
147 static void U_CALLCONV
deleteCDFLocaleData(void* ptr
) {
148 delete (icu::CDFLocaleData
*) ptr
;
155 static UBool
divisors_equal(const double* lhs
, const double* rhs
);
156 static const CDFLocaleStyleData
* getCDFLocaleStyleData(const Locale
& inLocale
, UNumberCompactStyle style
, UErrorCode
& status
);
158 static const CDFLocaleStyleData
* extractDataByStyleEnum(const CDFLocaleData
& data
, UNumberCompactStyle style
, UErrorCode
& status
);
159 static CDFLocaleData
* loadCDFLocaleData(const Locale
& inLocale
, UErrorCode
& status
);
160 static void load(const Locale
& inLocale
, CDFLocaleData
* result
, UErrorCode
& status
);
161 static int32_t populatePrefixSuffix(const char* variant
, int32_t log10Value
, const UnicodeString
& formatStr
, UHashtable
* result
, UBool overwrite
, UErrorCode
& status
);
162 static double calculateDivisor(double power10
, int32_t numZeros
);
163 static UBool
onlySpaces(UnicodeString u
);
164 static void fixQuotes(UnicodeString
& s
);
165 static void checkForOtherVariants(CDFLocaleStyleData
* result
, UErrorCode
& status
);
166 static void fillInMissing(CDFLocaleStyleData
* result
);
167 static int32_t computeLog10(double x
, UBool inRange
);
168 static CDFUnit
* createCDFUnit(const char* variant
, int32_t log10Value
, UHashtable
* table
, UErrorCode
& status
);
169 static const CDFUnit
* getCDFUnitFallback(const UHashtable
* table
, const UnicodeString
& variant
, int32_t log10Value
);
171 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat
)
173 CompactDecimalFormat::CompactDecimalFormat(
174 const DecimalFormat
& decimalFormat
,
175 const UHashtable
* unitsByVariant
,
176 const double* divisors
,
177 PluralRules
* pluralRules
)
178 : DecimalFormat(decimalFormat
), _unitsByVariant(unitsByVariant
), _divisors(divisors
), _pluralRules(pluralRules
) {
181 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat
& source
)
182 : DecimalFormat(source
), _unitsByVariant(source
._unitsByVariant
), _divisors(source
._divisors
), _pluralRules(source
._pluralRules
->clone()) {
185 CompactDecimalFormat
* U_EXPORT2
186 CompactDecimalFormat::createInstance(
187 const Locale
& inLocale
, UNumberCompactStyle style
, UErrorCode
& status
) {
188 LocalPointer
<DecimalFormat
> decfmt((DecimalFormat
*) NumberFormat::makeInstance(inLocale
, UNUM_DECIMAL
, TRUE
, status
));
189 if (U_FAILURE(status
)) {
192 LocalPointer
<PluralRules
> pluralRules(PluralRules::forLocale(inLocale
, status
));
193 if (U_FAILURE(status
)) {
196 const CDFLocaleStyleData
* data
= getCDFLocaleStyleData(inLocale
, style
, status
);
197 if (U_FAILURE(status
)) {
200 CompactDecimalFormat
* result
=
201 new CompactDecimalFormat(*decfmt
, data
->unitsByVariant
, data
->divisors
, pluralRules
.getAlias());
202 if (result
== NULL
) {
203 status
= U_MEMORY_ALLOCATION_ERROR
;
206 pluralRules
.orphan();
207 result
->setMaximumSignificantDigits(3);
208 result
->setSignificantDigitsUsed(TRUE
);
209 result
->setGroupingUsed(FALSE
);
213 CompactDecimalFormat
&
214 CompactDecimalFormat::operator=(const CompactDecimalFormat
& rhs
) {
216 DecimalFormat::operator=(rhs
);
217 _unitsByVariant
= rhs
._unitsByVariant
;
218 _divisors
= rhs
._divisors
;
220 _pluralRules
= rhs
._pluralRules
->clone();
225 CompactDecimalFormat::~CompactDecimalFormat() {
231 CompactDecimalFormat::clone(void) const {
232 return new CompactDecimalFormat(*this);
236 CompactDecimalFormat::operator==(const Format
& that
) const {
240 return (DecimalFormat::operator==(that
) && eqHelper((const CompactDecimalFormat
&) that
));
244 CompactDecimalFormat::eqHelper(const CompactDecimalFormat
& that
) const {
245 return uhash_equals(_unitsByVariant
, that
._unitsByVariant
) && divisors_equal(_divisors
, that
._divisors
) && (*_pluralRules
== *that
._pluralRules
);
249 CompactDecimalFormat::format(
251 UnicodeString
& appendTo
,
252 FieldPosition
& pos
) const {
253 UErrorCode status
= U_ZERO_ERROR
;
254 return format(number
, appendTo
, pos
, status
);
258 CompactDecimalFormat::format(
260 UnicodeString
& appendTo
,
262 UErrorCode
&status
) const {
263 if (U_FAILURE(status
)) {
266 DigitList orig
, rounded
;
269 _round(orig
, rounded
, isNegative
, status
);
270 if (U_FAILURE(status
)) {
273 double roundedDouble
= rounded
.getDouble();
275 roundedDouble
= -roundedDouble
;
277 int32_t baseIdx
= computeLog10(roundedDouble
, TRUE
);
278 double numberToFormat
= roundedDouble
/ _divisors
[baseIdx
];
279 UnicodeString variant
= _pluralRules
->select(numberToFormat
);
281 numberToFormat
= -numberToFormat
;
283 const CDFUnit
* unit
= getCDFUnitFallback(_unitsByVariant
, variant
, baseIdx
);
284 appendTo
+= unit
->prefix
;
285 DecimalFormat::format(numberToFormat
, appendTo
, pos
);
286 appendTo
+= unit
->suffix
;
291 CompactDecimalFormat::format(
293 UnicodeString
& appendTo
,
294 FieldPositionIterator
* /* posIter */,
295 UErrorCode
& status
) const {
296 status
= U_UNSUPPORTED_ERROR
;
301 CompactDecimalFormat::format(
303 UnicodeString
& appendTo
,
304 FieldPosition
& pos
) const {
305 return format((double) number
, appendTo
, pos
);
309 CompactDecimalFormat::format(
311 UnicodeString
& appendTo
,
313 UErrorCode
&status
) const {
314 return format((double) number
, appendTo
, pos
, status
);
318 CompactDecimalFormat::format(
319 int32_t /* number */,
320 UnicodeString
& appendTo
,
321 FieldPositionIterator
* /* posIter */,
322 UErrorCode
& status
) const {
323 status
= U_UNSUPPORTED_ERROR
;
328 CompactDecimalFormat::format(
330 UnicodeString
& appendTo
,
331 FieldPosition
& pos
) const {
332 return format((double) number
, appendTo
, pos
);
336 CompactDecimalFormat::format(
338 UnicodeString
& appendTo
,
340 UErrorCode
&status
) const {
341 return format((double) number
, appendTo
, pos
, status
);
345 CompactDecimalFormat::format(
346 int64_t /* number */,
347 UnicodeString
& appendTo
,
348 FieldPositionIterator
* /* posIter */,
349 UErrorCode
& status
) const {
350 status
= U_UNSUPPORTED_ERROR
;
355 CompactDecimalFormat::format(
356 StringPiece
/* number */,
357 UnicodeString
& appendTo
,
358 FieldPositionIterator
* /* posIter */,
359 UErrorCode
& status
) const {
360 status
= U_UNSUPPORTED_ERROR
;
365 CompactDecimalFormat::format(
366 const DigitList
& /* number */,
367 UnicodeString
& appendTo
,
368 FieldPositionIterator
* /* posIter */,
369 UErrorCode
& status
) const {
370 status
= U_UNSUPPORTED_ERROR
;
375 CompactDecimalFormat::format(const DigitList
& /* number */,
376 UnicodeString
& appendTo
,
377 FieldPosition
& /* pos */,
378 UErrorCode
& status
) const {
379 status
= U_UNSUPPORTED_ERROR
;
384 CompactDecimalFormat::parse(
385 const UnicodeString
& /* text */,
386 Formattable
& /* result */,
387 ParsePosition
& /* parsePosition */) const {
391 CompactDecimalFormat::parse(
392 const UnicodeString
& /* text */,
393 Formattable
& /* result */,
394 UErrorCode
& status
) const {
395 status
= U_UNSUPPORTED_ERROR
;
399 CompactDecimalFormat::parseCurrency(
400 const UnicodeString
& /* text */,
401 ParsePosition
& /* pos */) const {
405 void CDFLocaleStyleData::Init(UErrorCode
& status
) {
406 if (unitsByVariant
!= NULL
) {
409 unitsByVariant
= uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, &status
);
410 if (U_FAILURE(status
)) {
413 uhash_setKeyDeleter(unitsByVariant
, uprv_free
);
414 uhash_setValueDeleter(unitsByVariant
, deleteCDFUnits
);
417 CDFLocaleStyleData::~CDFLocaleStyleData() {
421 void CDFLocaleStyleData::setToBogus() {
422 if (unitsByVariant
!= NULL
) {
423 uhash_close(unitsByVariant
);
424 unitsByVariant
= NULL
;
428 void CDFLocaleData::Init(UErrorCode
& status
) {
429 shortData
.Init(status
);
430 if (U_FAILURE(status
)) {
433 longData
.Init(status
);
436 // Helper method for operator=
437 static UBool
divisors_equal(const double* lhs
, const double* rhs
) {
438 for (int32_t i
= 0; i
< MAX_DIGITS
; ++i
) {
439 if (lhs
[i
] != rhs
[i
]) {
446 // getCDFLocaleStyleData returns pointer to formatting data for given locale and
447 // style within the global cache. On cache miss, getCDFLocaleStyleData loads
448 // the data from CLDR into the global cache before returning the pointer. If a
449 // UNUM_LONG data is requested for a locale, and that locale does not have
450 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
452 static const CDFLocaleStyleData
* getCDFLocaleStyleData(const Locale
& inLocale
, UNumberCompactStyle style
, UErrorCode
& status
) {
453 if (U_FAILURE(status
)) {
456 CDFLocaleData
* result
= NULL
;
457 const char* key
= inLocale
.getName();
459 Mutex
lock(&gCompactDecimalMetaLock
);
460 if (gCompactDecimalData
== NULL
) {
461 gCompactDecimalData
= uhash_open(uhash_hashChars
, uhash_compareChars
, NULL
, &status
);
462 if (U_FAILURE(status
)) {
465 uhash_setKeyDeleter(gCompactDecimalData
, uprv_free
);
466 uhash_setValueDeleter(gCompactDecimalData
, deleteCDFLocaleData
);
467 ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO
, cdf_cleanup
);
469 result
= (CDFLocaleData
*) uhash_get(gCompactDecimalData
, key
);
472 if (result
!= NULL
) {
473 return extractDataByStyleEnum(*result
, style
, status
);
476 result
= loadCDFLocaleData(inLocale
, status
);
477 if (U_FAILURE(status
)) {
482 Mutex
lock(&gCompactDecimalMetaLock
);
483 CDFLocaleData
* temp
= (CDFLocaleData
*) uhash_get(gCompactDecimalData
, key
);
488 uhash_put(gCompactDecimalData
, uprv_strdup(key
), (void*) result
, &status
);
489 if (U_FAILURE(status
)) {
494 return extractDataByStyleEnum(*result
, style
, status
);
497 static const CDFLocaleStyleData
* extractDataByStyleEnum(const CDFLocaleData
& data
, UNumberCompactStyle style
, UErrorCode
& status
) {
500 return &data
.shortData
;
502 if (!data
.longData
.isBogus()) {
503 return &data
.longData
;
505 return &data
.shortData
;
507 status
= U_ILLEGAL_ARGUMENT_ERROR
;
512 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
513 // caller owns the returned pointer.
514 static CDFLocaleData
* loadCDFLocaleData(const Locale
& inLocale
, UErrorCode
& status
) {
515 if (U_FAILURE(status
)) {
518 CDFLocaleData
* result
= new CDFLocaleData
;
519 if (result
== NULL
) {
520 status
= U_MEMORY_ALLOCATION_ERROR
;
523 result
->Init(status
);
524 if (U_FAILURE(status
)) {
529 load(inLocale
, result
, status
);
531 if (U_FAILURE(status
)) {
540 struct CmptDecDataSink
: public ResourceSink
{
542 CDFLocaleData
& dataBundle
; // Where to save values when they are read
543 UBool isLatin
; // Whether or not we are traversing the Latin tree
544 UBool isFallback
; // Whether or not we are traversing the Latin tree as fallback
546 enum EPatternsTableKey
{ PATTERNS_SHORT
, PATTERNS_LONG
};
547 enum EFormatsTableKey
{ DECIMAL_FORMAT
, CURRENCY_FORMAT
};
550 * NumberElements{ <-- top (numbering system table)
551 * latn{ <-- patternsTable (one per numbering system)
552 * patternsLong{ <-- formatsTable (one per pattern)
553 * decimalFormat{ <-- powersOfTenTable (one per format)
554 * 1000{ <-- pluralVariantsTable (one per power of ten)
555 * one{"0 thousand"} <-- plural variant and template
558 CmptDecDataSink(CDFLocaleData
& _dataBundle
)
559 : dataBundle(_dataBundle
), isLatin(FALSE
), isFallback(FALSE
) {}
560 virtual ~CmptDecDataSink();
562 virtual void put(const char *key
, ResourceValue
&value
, UBool isRoot
, UErrorCode
&errorCode
) {
563 // SPECIAL CASE: Don't consume root in the non-Latin numbering system
564 if (isRoot
&& !isLatin
) { return; }
566 ResourceTable patternsTable
= value
.getTable(errorCode
);
567 if (U_FAILURE(errorCode
)) { return; }
568 for (int i1
= 0; patternsTable
.getKeyAndValue(i1
, key
, value
); ++i1
) {
570 // Check for patternsShort or patternsLong
571 EPatternsTableKey patternsTableKey
;
572 if (uprv_strcmp(key
, gPatternsShort
) == 0) {
573 patternsTableKey
= PATTERNS_SHORT
;
574 } else if (uprv_strcmp(key
, gPatternsLong
) == 0) {
575 patternsTableKey
= PATTERNS_LONG
;
580 // Traverse into the formats table
581 ResourceTable formatsTable
= value
.getTable(errorCode
);
582 if (U_FAILURE(errorCode
)) { return; }
583 for (int i2
= 0; formatsTable
.getKeyAndValue(i2
, key
, value
); ++i2
) {
585 // Check for decimalFormat or currencyFormat
586 EFormatsTableKey formatsTableKey
;
587 if (uprv_strcmp(key
, gDecimalFormatTag
) == 0) {
588 formatsTableKey
= DECIMAL_FORMAT
;
589 // TODO: Enable this statement when currency support is added
590 // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) {
591 // formatsTableKey = CURRENCY_FORMAT;
596 // Set the current style and destination based on the two keys
597 UNumberCompactStyle style
;
598 CDFLocaleStyleData
* destination
= NULL
;
599 if (patternsTableKey
== PATTERNS_LONG
600 && formatsTableKey
== DECIMAL_FORMAT
) {
602 destination
= &dataBundle
.longData
;
603 } else if (patternsTableKey
== PATTERNS_SHORT
604 && formatsTableKey
== DECIMAL_FORMAT
) {
606 destination
= &dataBundle
.shortData
;
607 // TODO: Enable the following statements when currency support is added
608 // } else if (patternsTableKey == PATTERNS_SHORT
609 // && formatsTableKey == CURRENCY_FORMAT) {
610 // style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named
611 // destination = &dataBundle.shortCurrencyData;
613 // // Silently ignore this case
617 // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE:
618 // 1) Don't consume longData if shortData was consumed from the non-Latin
619 // locale numbering system
620 // 2) Don't consume longData for the first time if this is the root bundle and
621 // shortData is already populated from a more specific locale. Note that if
622 // both longData and shortData are both only in root, longData will be
623 // consumed since it is alphabetically before shortData in the bundle.
625 && style
== UNUM_LONG
626 && !dataBundle
.shortData
.isEmpty()
627 && !dataBundle
.shortData
.fromFallback
) {
631 && style
== UNUM_LONG
632 && dataBundle
.longData
.isEmpty()
633 && !dataBundle
.shortData
.isEmpty()) {
637 // Set the "fromFallback" flag on the data object
638 destination
->fromFallback
= isFallback
;
640 // Traverse into the powers of ten table
641 ResourceTable powersOfTenTable
= value
.getTable(errorCode
);
642 if (U_FAILURE(errorCode
)) { return; }
643 for (int i3
= 0; powersOfTenTable
.getKeyAndValue(i3
, key
, value
); ++i3
) {
645 // The key will always be some even power of 10. e.g 10000.
647 double power10
= uprv_strtod(key
, &endPtr
);
649 errorCode
= U_INTERNAL_PROGRAM_ERROR
;
652 int32_t log10Value
= computeLog10(power10
, FALSE
);
654 // Silently ignore divisors that are too big.
655 if (log10Value
>= MAX_DIGITS
) continue;
657 // Iterate over the plural variants ("one", "other", etc)
658 ResourceTable pluralVariantsTable
= value
.getTable(errorCode
);
659 if (U_FAILURE(errorCode
)) { return; }
660 for (int i4
= 0; pluralVariantsTable
.getKeyAndValue(i4
, key
, value
); ++i4
) {
661 const char* pluralVariant
= key
;
662 const UnicodeString formatStr
= value
.getUnicodeString(errorCode
);
664 // Copy the data into the in-memory data bundle (do not overwrite
666 int32_t numZeros
= populatePrefixSuffix(
667 pluralVariant
, log10Value
, formatStr
,
668 destination
->unitsByVariant
, FALSE
, errorCode
);
670 // If populatePrefixSuffix returns -1, it means that this key has been
671 // encountered already.
676 // Set the divisor, which is based on the number of zeros in the template
677 // string. If the divisor from here is different from the one previously
678 // stored, it means that the number of zeros in different plural variants
679 // differs; throw an exception.
680 // TODO: How should I check for floating-point errors here?
681 // Is there a good reason why "divisor" is double and not long like Java?
682 double divisor
= calculateDivisor(power10
, numZeros
);
683 if (destination
->divisors
[log10Value
] != 0.0
684 && destination
->divisors
[log10Value
] != divisor
) {
685 errorCode
= U_INTERNAL_PROGRAM_ERROR
;
688 destination
->divisors
[log10Value
] = divisor
;
696 // Virtual destructors must be defined out of line.
697 CmptDecDataSink::~CmptDecDataSink() {}
701 static void load(const Locale
& inLocale
, CDFLocaleData
* result
, UErrorCode
& status
) {
702 LocalPointer
<NumberingSystem
> ns(NumberingSystem::createInstance(inLocale
, status
));
703 if (U_FAILURE(status
)) {
706 const char* nsName
= ns
->getName();
708 LocalUResourceBundlePointer
resource(ures_open(NULL
, inLocale
.getName(), &status
));
709 if (U_FAILURE(status
)) {
712 CmptDecDataSink
sink(*result
);
713 sink
.isFallback
= FALSE
;
715 // First load the number elements data if nsName is not Latin.
716 if (uprv_strcmp(nsName
, gLatnTag
) != 0) {
717 sink
.isLatin
= FALSE
;
719 path
.append(gNumberElementsTag
, status
)
721 .append(nsName
, status
);
722 ures_getAllItemsWithFallback(resource
.getAlias(), path
.data(), sink
, status
);
723 if (status
== U_MISSING_RESOURCE_ERROR
) {
724 // Silently ignore and use Latin
725 status
= U_ZERO_ERROR
;
726 } else if (U_FAILURE(status
)) {
729 sink
.isFallback
= TRUE
;
734 ures_getAllItemsWithFallback(resource
.getAlias(), gLatnPath
, sink
, status
);
735 if (U_FAILURE(status
)) return;
737 // If longData is empty, default it to be equal to shortData
738 if (result
->longData
.isEmpty()) {
739 result
->longData
.setToBogus();
742 // Check for "other" variants in each of the three data classes, and resolve missing elements.
744 if (!result
->longData
.isBogus()) {
745 checkForOtherVariants(&result
->longData
, status
);
746 if (U_FAILURE(status
)) return;
747 fillInMissing(&result
->longData
);
750 checkForOtherVariants(&result
->shortData
, status
);
751 if (U_FAILURE(status
)) return;
752 fillInMissing(&result
->shortData
);
754 // TODO: Enable this statement when currency support is added
755 // checkForOtherVariants(&result->shortCurrencyData, status);
756 // if (U_FAILURE(status)) return;
757 // fillInMissing(&result->shortCurrencyData);
760 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
761 // given variant and log10 value.
762 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
763 // formatStr is the format string from which the prefix and suffix are
764 // extracted. It is usually of form 'Pefix 000 suffix'.
765 // populatePrefixSuffix returns the number of 0's found in formatStr
766 // before the decimal point.
767 // In the special case that formatStr contains only spaces for prefix
768 // and suffix, populatePrefixSuffix returns log10Value + 1.
769 static int32_t populatePrefixSuffix(
770 const char* variant
, int32_t log10Value
, const UnicodeString
& formatStr
, UHashtable
* result
, UBool overwrite
, UErrorCode
& status
) {
771 if (U_FAILURE(status
)) {
775 // ICU 59 HACK: Ignore negative part of format string, mimicking ICU 58 behavior.
776 // TODO(sffc): Make sure this is fixed during the overhaul port in ICU 60.
777 int32_t semiPos
= formatStr
.indexOf(';', 0);
779 semiPos
= formatStr
.length();
781 UnicodeString positivePart
= formatStr
.tempSubString(0, semiPos
);
783 int32_t firstIdx
= positivePart
.indexOf(kZero
, UPRV_LENGTHOF(kZero
), 0);
784 // We must have 0's in format string.
785 if (firstIdx
== -1) {
786 status
= U_INTERNAL_PROGRAM_ERROR
;
789 int32_t lastIdx
= positivePart
.lastIndexOf(kZero
, UPRV_LENGTHOF(kZero
), firstIdx
);
790 CDFUnit
* unit
= createCDFUnit(variant
, log10Value
, result
, status
);
791 if (U_FAILURE(status
)) {
795 // Return -1 if we are not overwriting an existing value
796 if (unit
->isSet() && !overwrite
) {
801 // Everything up to first 0 is the prefix
802 unit
->prefix
= positivePart
.tempSubString(0, firstIdx
);
803 fixQuotes(unit
->prefix
);
804 // Everything beyond the last 0 is the suffix
805 unit
->suffix
= positivePart
.tempSubString(lastIdx
+ 1);
806 fixQuotes(unit
->suffix
);
808 // If there is effectively no prefix or suffix, ignore the actual number of
809 // 0's and act as if the number of 0's matches the size of the number.
810 if (onlySpaces(unit
->prefix
) && onlySpaces(unit
->suffix
)) {
811 return log10Value
+ 1;
814 // Calculate number of zeros before decimal point
815 int32_t idx
= firstIdx
+ 1;
816 while (idx
<= lastIdx
&& positivePart
.charAt(idx
) == u_0
) {
819 return (idx
- firstIdx
);
822 // Calculate a divisor based on the magnitude and number of zeros in the
824 static double calculateDivisor(double power10
, int32_t numZeros
) {
825 double divisor
= power10
;
826 for (int32_t i
= 1; i
< numZeros
; ++i
) {
832 static UBool
onlySpaces(UnicodeString u
) {
833 return u
.trim().length() == 0;
836 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
837 // Modifies s in place.
838 static void fixQuotes(UnicodeString
& s
) {
839 QuoteState state
= OUTSIDE
;
840 int32_t len
= s
.length();
842 for (int32_t i
= 0; i
< len
; ++i
) {
843 UChar ch
= s
.charAt(i
);
845 if (state
== INSIDE_EMPTY
) {
846 s
.setCharAt(dest
, ch
);
850 s
.setCharAt(dest
, ch
);
857 state
= ch
== u_apos
? INSIDE_EMPTY
: OUTSIDE
;
861 state
= ch
== u_apos
? OUTSIDE
: INSIDE_FULL
;
870 // Checks to make sure that an "other" variant is present in all
872 static void checkForOtherVariants(CDFLocaleStyleData
* result
,
873 UErrorCode
& status
) {
874 if (result
== NULL
|| result
->unitsByVariant
== NULL
) {
878 const CDFUnit
* otherByBase
=
879 (const CDFUnit
*) uhash_get(result
->unitsByVariant
, gOther
);
880 if (otherByBase
== NULL
) {
881 status
= U_INTERNAL_PROGRAM_ERROR
;
885 // Check all other plural variants, and make sure that if
886 // any of them are populated, then other is also populated
887 int32_t pos
= UHASH_FIRST
;
888 const UHashElement
* element
;
889 while ((element
= uhash_nextElement(result
->unitsByVariant
, &pos
)) != NULL
) {
890 CDFUnit
* variantsByBase
= (CDFUnit
*) element
->value
.pointer
;
891 if (variantsByBase
== otherByBase
) continue;
892 for (int32_t log10Value
= 0; log10Value
< MAX_DIGITS
; ++log10Value
) {
893 if (variantsByBase
[log10Value
].isSet()
894 && !otherByBase
[log10Value
].isSet()) {
895 status
= U_INTERNAL_PROGRAM_ERROR
;
902 // fillInMissing ensures that the data in result is complete.
903 // result data is complete if for each variant in result, there exists
904 // a prefix-suffix pair for each log10 value and there also exists
905 // a divisor for each log10 value.
907 // First this function figures out for which log10 values, the other
908 // variant already had data. These are the same log10 values defined
911 // For each log10 value not defined in CLDR, it uses the divisor for
912 // the last defined log10 value or 1.
914 // Then for each variant, it does the following. For each log10
915 // value not defined in CLDR, copy the prefix-suffix pair from the
916 // previous log10 value. If log10 value is defined in CLDR but is
917 // missing from given variant, copy the prefix-suffix pair for that
918 // log10 value from the 'other' variant.
919 static void fillInMissing(CDFLocaleStyleData
* result
) {
920 const CDFUnit
* otherUnits
=
921 (const CDFUnit
*) uhash_get(result
->unitsByVariant
, gOther
);
922 UBool definedInCLDR
[MAX_DIGITS
];
923 double lastDivisor
= 1.0;
924 for (int32_t i
= 0; i
< MAX_DIGITS
; ++i
) {
925 if (!otherUnits
[i
].isSet()) {
926 result
->divisors
[i
] = lastDivisor
;
927 definedInCLDR
[i
] = FALSE
;
929 lastDivisor
= result
->divisors
[i
];
930 definedInCLDR
[i
] = TRUE
;
933 // Iterate over each variant.
934 int32_t pos
= UHASH_FIRST
;
935 const UHashElement
* element
= uhash_nextElement(result
->unitsByVariant
, &pos
);
936 for (;element
!= NULL
; element
= uhash_nextElement(result
->unitsByVariant
, &pos
)) {
937 CDFUnit
* units
= (CDFUnit
*) element
->value
.pointer
;
938 for (int32_t i
= 0; i
< MAX_DIGITS
; ++i
) {
939 if (definedInCLDR
[i
]) {
940 if (!units
[i
].isSet()) {
941 units
[i
] = otherUnits
[i
];
945 units
[0].markAsSet();
947 units
[i
] = units
[i
- 1];
954 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
955 // value computeLog10 will return MAX_DIGITS -1 even for
956 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
958 static int32_t computeLog10(double x
, UBool inRange
) {
960 int32_t max
= inRange
? MAX_DIGITS
- 1 : MAX_DIGITS
;
971 // createCDFUnit returns a pointer to the prefix-suffix pair for a given
972 // variant and log10 value within table. If no such prefix-suffix pair is
973 // stored in table, one is created within table before returning pointer.
974 static CDFUnit
* createCDFUnit(const char* variant
, int32_t log10Value
, UHashtable
* table
, UErrorCode
& status
) {
975 if (U_FAILURE(status
)) {
978 CDFUnit
*cdfUnit
= (CDFUnit
*) uhash_get(table
, variant
);
979 if (cdfUnit
== NULL
) {
980 cdfUnit
= new CDFUnit
[MAX_DIGITS
];
981 if (cdfUnit
== NULL
) {
982 status
= U_MEMORY_ALLOCATION_ERROR
;
985 uhash_put(table
, uprv_strdup(variant
), cdfUnit
, &status
);
986 if (U_FAILURE(status
)) {
990 CDFUnit
* result
= &cdfUnit
[log10Value
];
994 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
995 // variant and log10 value within table. If the given variant doesn't exist, it
996 // falls back to the OTHER variant. Therefore, this method will always return
997 // some non-NULL value.
998 static const CDFUnit
* getCDFUnitFallback(const UHashtable
* table
, const UnicodeString
& variant
, int32_t log10Value
) {
1000 UErrorCode status
= U_ZERO_ERROR
;
1001 const CDFUnit
*cdfUnit
= NULL
;
1002 cvariant
.appendInvariantChars(variant
, status
);
1003 if (!U_FAILURE(status
)) {
1004 cdfUnit
= (const CDFUnit
*) uhash_get(table
, cvariant
.data());
1006 if (cdfUnit
== NULL
) {
1007 cdfUnit
= (const CDFUnit
*) uhash_get(table
, gOther
);
1009 return &cdfUnit
[log10Value
];