]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
51004dcb A |
3 | /* |
4 | ******************************************************************************* | |
2ca993e8 | 5 | * Copyright (C) 1997-2015, International Business Machines Corporation and * |
51004dcb A |
6 | * others. All Rights Reserved. * |
7 | ******************************************************************************* | |
8 | * | |
9 | * File COMPACTDECIMALFORMAT.CPP | |
10 | * | |
11 | ******************************************************************************** | |
12 | */ | |
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_FORMATTING | |
16 | ||
17 | #include "charstr.h" | |
18 | #include "cstring.h" | |
19 | #include "digitlst.h" | |
20 | #include "mutex.h" | |
21 | #include "unicode/compactdecimalformat.h" | |
22 | #include "unicode/numsys.h" | |
23 | #include "unicode/plurrule.h" | |
24 | #include "unicode/ures.h" | |
25 | #include "ucln_in.h" | |
26 | #include "uhash.h" | |
27 | #include "umutex.h" | |
28 | #include "unicode/ures.h" | |
29 | #include "uresimp.h" | |
30 | ||
51004dcb A |
31 | // Maps locale name to CDFLocaleData struct. |
32 | static UHashtable* gCompactDecimalData = NULL; | |
33 | static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER; | |
34 | ||
35 | U_NAMESPACE_BEGIN | |
36 | ||
37 | static const int32_t MAX_DIGITS = 15; | |
38 | static const char gOther[] = "other"; | |
39 | static const char gLatnTag[] = "latn"; | |
40 | static const char gNumberElementsTag[] = "NumberElements"; | |
41 | static const char gDecimalFormatTag[] = "decimalFormat"; | |
42 | static const char gPatternsShort[] = "patternsShort"; | |
43 | static const char gPatternsLong[] = "patternsLong"; | |
f3c0d7a5 | 44 | static const char gLatnPath[] = "NumberElements/latn"; |
51004dcb A |
45 | |
46 | static const UChar u_0 = 0x30; | |
47 | static const UChar u_apos = 0x27; | |
48 | ||
49 | static const UChar kZero[] = {u_0}; | |
50 | ||
51 | // Used to unescape single quotes. | |
52 | enum QuoteState { | |
53 | OUTSIDE, | |
54 | INSIDE_EMPTY, | |
55 | INSIDE_FULL | |
56 | }; | |
57 | ||
58 | enum FallbackFlags { | |
59 | ANY = 0, | |
60 | MUST = 1, | |
61 | NOT_ROOT = 2 | |
62 | // Next one will be 4 then 6 etc. | |
63 | }; | |
64 | ||
65 | ||
66 | // CDFUnit represents a prefix-suffix pair for a particular variant | |
67 | // and log10 value. | |
68 | struct CDFUnit : public UMemory { | |
69 | UnicodeString prefix; | |
70 | UnicodeString suffix; | |
71 | inline CDFUnit() : prefix(), suffix() { | |
72 | prefix.setToBogus(); | |
73 | } | |
74 | inline ~CDFUnit() {} | |
75 | inline UBool isSet() const { | |
76 | return !prefix.isBogus(); | |
77 | } | |
78 | inline void markAsSet() { | |
79 | prefix.remove(); | |
80 | } | |
81 | }; | |
82 | ||
83 | // CDFLocaleStyleData contains formatting data for a particular locale | |
84 | // and style. | |
85 | class CDFLocaleStyleData : public UMemory { | |
86 | public: | |
87 | // What to divide by for each log10 value when formatting. These values | |
88 | // will be powers of 10. For English, would be: | |
89 | // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ... | |
90 | double divisors[MAX_DIGITS]; | |
91 | // Maps plural variants to CDFUnit[MAX_DIGITS] arrays. | |
92 | // To format a number x, | |
93 | // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]). | |
94 | // Compute the plural variant for displayNum | |
95 | // (e.g zero, one, two, few, many, other). | |
96 | // Compute cdfUnits = unitsByVariant[pluralVariant]. | |
97 | // Prefix and suffix to use at cdfUnits[log10(x)] | |
98 | UHashtable* unitsByVariant; | |
f3c0d7a5 A |
99 | // A flag for whether or not this CDFLocaleStyleData was loaded from the |
100 | // Latin numbering system as a fallback from the locale numbering system. | |
101 | // This value is meaningless if the object is bogus or empty. | |
102 | UBool fromFallback; | |
103 | inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) { | |
104 | uprv_memset(divisors, 0, sizeof(divisors)); | |
105 | } | |
51004dcb A |
106 | ~CDFLocaleStyleData(); |
107 | // Init initializes this object. | |
108 | void Init(UErrorCode& status); | |
109 | inline UBool isBogus() const { | |
110 | return unitsByVariant == NULL; | |
111 | } | |
112 | void setToBogus(); | |
f3c0d7a5 A |
113 | UBool isEmpty() { |
114 | return unitsByVariant == NULL || unitsByVariant->count == 0; | |
115 | } | |
51004dcb A |
116 | private: |
117 | CDFLocaleStyleData(const CDFLocaleStyleData&); | |
118 | CDFLocaleStyleData& operator=(const CDFLocaleStyleData&); | |
119 | }; | |
120 | ||
121 | // CDFLocaleData contains formatting data for a particular locale. | |
122 | struct CDFLocaleData : public UMemory { | |
123 | CDFLocaleStyleData shortData; | |
124 | CDFLocaleStyleData longData; | |
125 | inline CDFLocaleData() : shortData(), longData() { } | |
126 | inline ~CDFLocaleData() { } | |
127 | // Init initializes this object. | |
128 | void Init(UErrorCode& status); | |
129 | }; | |
130 | ||
131 | U_NAMESPACE_END | |
132 | ||
133 | U_CDECL_BEGIN | |
134 | ||
135 | static UBool U_CALLCONV cdf_cleanup(void) { | |
136 | if (gCompactDecimalData != NULL) { | |
137 | uhash_close(gCompactDecimalData); | |
138 | gCompactDecimalData = NULL; | |
139 | } | |
140 | return TRUE; | |
141 | } | |
142 | ||
143 | static void U_CALLCONV deleteCDFUnits(void* ptr) { | |
144 | delete [] (icu::CDFUnit*) ptr; | |
145 | } | |
146 | ||
147 | static void U_CALLCONV deleteCDFLocaleData(void* ptr) { | |
148 | delete (icu::CDFLocaleData*) ptr; | |
149 | } | |
150 | ||
151 | U_CDECL_END | |
152 | ||
153 | U_NAMESPACE_BEGIN | |
154 | ||
155 | static UBool divisors_equal(const double* lhs, const double* rhs); | |
156 | static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status); | |
157 | ||
158 | static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status); | |
159 | static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status); | |
f3c0d7a5 A |
160 | static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status); |
161 | static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status); | |
162 | static double calculateDivisor(double power10, int32_t numZeros); | |
51004dcb A |
163 | static UBool onlySpaces(UnicodeString u); |
164 | static void fixQuotes(UnicodeString& s); | |
f3c0d7a5 | 165 | static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status); |
51004dcb A |
166 | static void fillInMissing(CDFLocaleStyleData* result); |
167 | static int32_t computeLog10(double x, UBool inRange); | |
168 | static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status); | |
169 | static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value); | |
170 | ||
171 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat) | |
172 | ||
173 | CompactDecimalFormat::CompactDecimalFormat( | |
174 | const DecimalFormat& decimalFormat, | |
175 | const UHashtable* unitsByVariant, | |
176 | const double* divisors, | |
177 | PluralRules* pluralRules) | |
178 | : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) { | |
179 | } | |
180 | ||
181 | CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source) | |
182 | : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) { | |
183 | } | |
184 | ||
185 | CompactDecimalFormat* U_EXPORT2 | |
186 | CompactDecimalFormat::createInstance( | |
187 | const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) { | |
188 | LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status)); | |
189 | if (U_FAILURE(status)) { | |
190 | return NULL; | |
191 | } | |
192 | LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status)); | |
193 | if (U_FAILURE(status)) { | |
194 | return NULL; | |
195 | } | |
196 | const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status); | |
197 | if (U_FAILURE(status)) { | |
198 | return NULL; | |
199 | } | |
200 | CompactDecimalFormat* result = | |
201 | new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias()); | |
202 | if (result == NULL) { | |
203 | status = U_MEMORY_ALLOCATION_ERROR; | |
204 | return NULL; | |
205 | } | |
206 | pluralRules.orphan(); | |
207 | result->setMaximumSignificantDigits(3); | |
208 | result->setSignificantDigitsUsed(TRUE); | |
209 | result->setGroupingUsed(FALSE); | |
210 | return result; | |
211 | } | |
212 | ||
213 | CompactDecimalFormat& | |
214 | CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) { | |
215 | if (this != &rhs) { | |
216 | DecimalFormat::operator=(rhs); | |
217 | _unitsByVariant = rhs._unitsByVariant; | |
218 | _divisors = rhs._divisors; | |
219 | delete _pluralRules; | |
220 | _pluralRules = rhs._pluralRules->clone(); | |
221 | } | |
222 | return *this; | |
223 | } | |
224 | ||
225 | CompactDecimalFormat::~CompactDecimalFormat() { | |
226 | delete _pluralRules; | |
227 | } | |
228 | ||
229 | ||
230 | Format* | |
231 | CompactDecimalFormat::clone(void) const { | |
232 | return new CompactDecimalFormat(*this); | |
233 | } | |
234 | ||
235 | UBool | |
236 | CompactDecimalFormat::operator==(const Format& that) const { | |
237 | if (this == &that) { | |
238 | return TRUE; | |
239 | } | |
240 | return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that)); | |
241 | } | |
242 | ||
243 | UBool | |
244 | CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const { | |
245 | return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules); | |
246 | } | |
247 | ||
248 | UnicodeString& | |
249 | CompactDecimalFormat::format( | |
250 | double number, | |
251 | UnicodeString& appendTo, | |
252 | FieldPosition& pos) const { | |
2ca993e8 A |
253 | UErrorCode status = U_ZERO_ERROR; |
254 | return format(number, appendTo, pos, status); | |
255 | } | |
256 | ||
257 | UnicodeString& | |
258 | CompactDecimalFormat::format( | |
259 | double number, | |
260 | UnicodeString& appendTo, | |
261 | FieldPosition& pos, | |
262 | UErrorCode &status) const { | |
263 | if (U_FAILURE(status)) { | |
264 | return appendTo; | |
265 | } | |
51004dcb A |
266 | DigitList orig, rounded; |
267 | orig.set(number); | |
268 | UBool isNegative; | |
51004dcb A |
269 | _round(orig, rounded, isNegative, status); |
270 | if (U_FAILURE(status)) { | |
271 | return appendTo; | |
272 | } | |
273 | double roundedDouble = rounded.getDouble(); | |
274 | if (isNegative) { | |
275 | roundedDouble = -roundedDouble; | |
276 | } | |
277 | int32_t baseIdx = computeLog10(roundedDouble, TRUE); | |
278 | double numberToFormat = roundedDouble / _divisors[baseIdx]; | |
279 | UnicodeString variant = _pluralRules->select(numberToFormat); | |
280 | if (isNegative) { | |
281 | numberToFormat = -numberToFormat; | |
282 | } | |
283 | const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx); | |
284 | appendTo += unit->prefix; | |
285 | DecimalFormat::format(numberToFormat, appendTo, pos); | |
286 | appendTo += unit->suffix; | |
287 | return appendTo; | |
288 | } | |
289 | ||
290 | UnicodeString& | |
291 | CompactDecimalFormat::format( | |
292 | double /* number */, | |
293 | UnicodeString& appendTo, | |
294 | FieldPositionIterator* /* posIter */, | |
295 | UErrorCode& status) const { | |
296 | status = U_UNSUPPORTED_ERROR; | |
297 | return appendTo; | |
298 | } | |
299 | ||
2ca993e8 A |
300 | UnicodeString& |
301 | CompactDecimalFormat::format( | |
302 | int32_t number, | |
303 | UnicodeString& appendTo, | |
304 | FieldPosition& pos) const { | |
305 | return format((double) number, appendTo, pos); | |
306 | } | |
307 | ||
308 | UnicodeString& | |
309 | CompactDecimalFormat::format( | |
310 | int32_t number, | |
311 | UnicodeString& appendTo, | |
312 | FieldPosition& pos, | |
313 | UErrorCode &status) const { | |
314 | return format((double) number, appendTo, pos, status); | |
315 | } | |
316 | ||
317 | UnicodeString& | |
318 | CompactDecimalFormat::format( | |
319 | int32_t /* number */, | |
320 | UnicodeString& appendTo, | |
321 | FieldPositionIterator* /* posIter */, | |
322 | UErrorCode& status) const { | |
323 | status = U_UNSUPPORTED_ERROR; | |
324 | return appendTo; | |
325 | } | |
326 | ||
51004dcb A |
327 | UnicodeString& |
328 | CompactDecimalFormat::format( | |
329 | int64_t number, | |
330 | UnicodeString& appendTo, | |
331 | FieldPosition& pos) const { | |
332 | return format((double) number, appendTo, pos); | |
333 | } | |
334 | ||
2ca993e8 A |
335 | UnicodeString& |
336 | CompactDecimalFormat::format( | |
337 | int64_t number, | |
338 | UnicodeString& appendTo, | |
339 | FieldPosition& pos, | |
340 | UErrorCode &status) const { | |
341 | return format((double) number, appendTo, pos, status); | |
342 | } | |
343 | ||
51004dcb A |
344 | UnicodeString& |
345 | CompactDecimalFormat::format( | |
346 | int64_t /* number */, | |
347 | UnicodeString& appendTo, | |
348 | FieldPositionIterator* /* posIter */, | |
349 | UErrorCode& status) const { | |
350 | status = U_UNSUPPORTED_ERROR; | |
351 | return appendTo; | |
352 | } | |
353 | ||
354 | UnicodeString& | |
355 | CompactDecimalFormat::format( | |
f3c0d7a5 | 356 | StringPiece /* number */, |
51004dcb A |
357 | UnicodeString& appendTo, |
358 | FieldPositionIterator* /* posIter */, | |
359 | UErrorCode& status) const { | |
360 | status = U_UNSUPPORTED_ERROR; | |
361 | return appendTo; | |
362 | } | |
363 | ||
364 | UnicodeString& | |
365 | CompactDecimalFormat::format( | |
366 | const DigitList& /* number */, | |
367 | UnicodeString& appendTo, | |
368 | FieldPositionIterator* /* posIter */, | |
369 | UErrorCode& status) const { | |
370 | status = U_UNSUPPORTED_ERROR; | |
371 | return appendTo; | |
372 | } | |
373 | ||
374 | UnicodeString& | |
375 | CompactDecimalFormat::format(const DigitList& /* number */, | |
376 | UnicodeString& appendTo, | |
377 | FieldPosition& /* pos */, | |
378 | UErrorCode& status) const { | |
379 | status = U_UNSUPPORTED_ERROR; | |
380 | return appendTo; | |
381 | } | |
382 | ||
383 | void | |
384 | CompactDecimalFormat::parse( | |
385 | const UnicodeString& /* text */, | |
386 | Formattable& /* result */, | |
387 | ParsePosition& /* parsePosition */) const { | |
388 | } | |
389 | ||
390 | void | |
391 | CompactDecimalFormat::parse( | |
392 | const UnicodeString& /* text */, | |
393 | Formattable& /* result */, | |
394 | UErrorCode& status) const { | |
395 | status = U_UNSUPPORTED_ERROR; | |
396 | } | |
397 | ||
398 | CurrencyAmount* | |
399 | CompactDecimalFormat::parseCurrency( | |
400 | const UnicodeString& /* text */, | |
401 | ParsePosition& /* pos */) const { | |
402 | return NULL; | |
403 | } | |
404 | ||
405 | void CDFLocaleStyleData::Init(UErrorCode& status) { | |
406 | if (unitsByVariant != NULL) { | |
407 | return; | |
408 | } | |
409 | unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); | |
410 | if (U_FAILURE(status)) { | |
411 | return; | |
412 | } | |
413 | uhash_setKeyDeleter(unitsByVariant, uprv_free); | |
414 | uhash_setValueDeleter(unitsByVariant, deleteCDFUnits); | |
415 | } | |
416 | ||
417 | CDFLocaleStyleData::~CDFLocaleStyleData() { | |
418 | setToBogus(); | |
419 | } | |
420 | ||
421 | void CDFLocaleStyleData::setToBogus() { | |
422 | if (unitsByVariant != NULL) { | |
423 | uhash_close(unitsByVariant); | |
424 | unitsByVariant = NULL; | |
425 | } | |
426 | } | |
427 | ||
428 | void CDFLocaleData::Init(UErrorCode& status) { | |
429 | shortData.Init(status); | |
430 | if (U_FAILURE(status)) { | |
431 | return; | |
432 | } | |
433 | longData.Init(status); | |
434 | } | |
435 | ||
436 | // Helper method for operator= | |
437 | static UBool divisors_equal(const double* lhs, const double* rhs) { | |
438 | for (int32_t i = 0; i < MAX_DIGITS; ++i) { | |
439 | if (lhs[i] != rhs[i]) { | |
440 | return FALSE; | |
441 | } | |
442 | } | |
443 | return TRUE; | |
444 | } | |
445 | ||
446 | // getCDFLocaleStyleData returns pointer to formatting data for given locale and | |
447 | // style within the global cache. On cache miss, getCDFLocaleStyleData loads | |
448 | // the data from CLDR into the global cache before returning the pointer. If a | |
449 | // UNUM_LONG data is requested for a locale, and that locale does not have | |
450 | // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for | |
451 | // that locale. | |
452 | static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) { | |
453 | if (U_FAILURE(status)) { | |
454 | return NULL; | |
455 | } | |
456 | CDFLocaleData* result = NULL; | |
457 | const char* key = inLocale.getName(); | |
458 | { | |
459 | Mutex lock(&gCompactDecimalMetaLock); | |
460 | if (gCompactDecimalData == NULL) { | |
461 | gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); | |
462 | if (U_FAILURE(status)) { | |
463 | return NULL; | |
464 | } | |
465 | uhash_setKeyDeleter(gCompactDecimalData, uprv_free); | |
466 | uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData); | |
467 | ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup); | |
468 | } else { | |
469 | result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key); | |
470 | } | |
471 | } | |
472 | if (result != NULL) { | |
473 | return extractDataByStyleEnum(*result, style, status); | |
474 | } | |
475 | ||
476 | result = loadCDFLocaleData(inLocale, status); | |
477 | if (U_FAILURE(status)) { | |
478 | return NULL; | |
479 | } | |
480 | ||
481 | { | |
482 | Mutex lock(&gCompactDecimalMetaLock); | |
483 | CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key); | |
484 | if (temp != NULL) { | |
485 | delete result; | |
486 | result = temp; | |
487 | } else { | |
488 | uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status); | |
489 | if (U_FAILURE(status)) { | |
490 | return NULL; | |
491 | } | |
492 | } | |
493 | } | |
494 | return extractDataByStyleEnum(*result, style, status); | |
495 | } | |
496 | ||
497 | static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) { | |
498 | switch (style) { | |
499 | case UNUM_SHORT: | |
500 | return &data.shortData; | |
501 | case UNUM_LONG: | |
502 | if (!data.longData.isBogus()) { | |
503 | return &data.longData; | |
504 | } | |
505 | return &data.shortData; | |
506 | default: | |
507 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
508 | return NULL; | |
509 | } | |
510 | } | |
511 | ||
512 | // loadCDFLocaleData loads formatting data from CLDR for a given locale. The | |
513 | // caller owns the returned pointer. | |
514 | static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) { | |
515 | if (U_FAILURE(status)) { | |
516 | return NULL; | |
517 | } | |
518 | CDFLocaleData* result = new CDFLocaleData; | |
519 | if (result == NULL) { | |
520 | status = U_MEMORY_ALLOCATION_ERROR; | |
521 | return NULL; | |
522 | } | |
523 | result->Init(status); | |
524 | if (U_FAILURE(status)) { | |
525 | delete result; | |
526 | return NULL; | |
527 | } | |
528 | ||
f3c0d7a5 A |
529 | load(inLocale, result, status); |
530 | ||
51004dcb A |
531 | if (U_FAILURE(status)) { |
532 | delete result; | |
533 | return NULL; | |
534 | } | |
535 | return result; | |
536 | } | |
537 | ||
f3c0d7a5 | 538 | namespace { |
51004dcb | 539 | |
f3c0d7a5 A |
540 | struct CmptDecDataSink : public ResourceSink { |
541 | ||
542 | CDFLocaleData& dataBundle; // Where to save values when they are read | |
543 | UBool isLatin; // Whether or not we are traversing the Latin tree | |
544 | UBool isFallback; // Whether or not we are traversing the Latin tree as fallback | |
545 | ||
546 | enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG }; | |
547 | enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT }; | |
548 | ||
549 | /* | |
550 | * NumberElements{ <-- top (numbering system table) | |
551 | * latn{ <-- patternsTable (one per numbering system) | |
552 | * patternsLong{ <-- formatsTable (one per pattern) | |
553 | * decimalFormat{ <-- powersOfTenTable (one per format) | |
554 | * 1000{ <-- pluralVariantsTable (one per power of ten) | |
555 | * one{"0 thousand"} <-- plural variant and template | |
556 | */ | |
51004dcb | 557 | |
f3c0d7a5 A |
558 | CmptDecDataSink(CDFLocaleData& _dataBundle) |
559 | : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {} | |
560 | virtual ~CmptDecDataSink(); | |
561 | ||
562 | virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) { | |
563 | // SPECIAL CASE: Don't consume root in the non-Latin numbering system | |
564 | if (isRoot && !isLatin) { return; } | |
565 | ||
566 | ResourceTable patternsTable = value.getTable(errorCode); | |
567 | if (U_FAILURE(errorCode)) { return; } | |
568 | for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) { | |
569 | ||
570 | // Check for patternsShort or patternsLong | |
571 | EPatternsTableKey patternsTableKey; | |
572 | if (uprv_strcmp(key, gPatternsShort) == 0) { | |
573 | patternsTableKey = PATTERNS_SHORT; | |
574 | } else if (uprv_strcmp(key, gPatternsLong) == 0) { | |
575 | patternsTableKey = PATTERNS_LONG; | |
576 | } else { | |
577 | continue; | |
51004dcb | 578 | } |
51004dcb | 579 | |
f3c0d7a5 A |
580 | // Traverse into the formats table |
581 | ResourceTable formatsTable = value.getTable(errorCode); | |
582 | if (U_FAILURE(errorCode)) { return; } | |
583 | for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) { | |
584 | ||
585 | // Check for decimalFormat or currencyFormat | |
586 | EFormatsTableKey formatsTableKey; | |
587 | if (uprv_strcmp(key, gDecimalFormatTag) == 0) { | |
588 | formatsTableKey = DECIMAL_FORMAT; | |
589 | // TODO: Enable this statement when currency support is added | |
590 | // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) { | |
591 | // formatsTableKey = CURRENCY_FORMAT; | |
592 | } else { | |
593 | continue; | |
594 | } | |
595 | ||
596 | // Set the current style and destination based on the two keys | |
597 | UNumberCompactStyle style; | |
598 | CDFLocaleStyleData* destination = NULL; | |
599 | if (patternsTableKey == PATTERNS_LONG | |
600 | && formatsTableKey == DECIMAL_FORMAT) { | |
601 | style = UNUM_LONG; | |
602 | destination = &dataBundle.longData; | |
603 | } else if (patternsTableKey == PATTERNS_SHORT | |
604 | && formatsTableKey == DECIMAL_FORMAT) { | |
605 | style = UNUM_SHORT; | |
606 | destination = &dataBundle.shortData; | |
607 | // TODO: Enable the following statements when currency support is added | |
608 | // } else if (patternsTableKey == PATTERNS_SHORT | |
609 | // && formatsTableKey == CURRENCY_FORMAT) { | |
610 | // style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named | |
611 | // destination = &dataBundle.shortCurrencyData; | |
612 | // } else { | |
613 | // // Silently ignore this case | |
614 | // continue; | |
615 | } | |
616 | ||
617 | // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE: | |
618 | // 1) Don't consume longData if shortData was consumed from the non-Latin | |
619 | // locale numbering system | |
620 | // 2) Don't consume longData for the first time if this is the root bundle and | |
621 | // shortData is already populated from a more specific locale. Note that if | |
622 | // both longData and shortData are both only in root, longData will be | |
623 | // consumed since it is alphabetically before shortData in the bundle. | |
624 | if (isFallback | |
625 | && style == UNUM_LONG | |
626 | && !dataBundle.shortData.isEmpty() | |
627 | && !dataBundle.shortData.fromFallback) { | |
628 | continue; | |
629 | } | |
630 | if (isRoot | |
631 | && style == UNUM_LONG | |
632 | && dataBundle.longData.isEmpty() | |
633 | && !dataBundle.shortData.isEmpty()) { | |
634 | continue; | |
635 | } | |
636 | ||
637 | // Set the "fromFallback" flag on the data object | |
638 | destination->fromFallback = isFallback; | |
639 | ||
640 | // Traverse into the powers of ten table | |
641 | ResourceTable powersOfTenTable = value.getTable(errorCode); | |
642 | if (U_FAILURE(errorCode)) { return; } | |
643 | for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) { | |
644 | ||
645 | // The key will always be some even power of 10. e.g 10000. | |
646 | char* endPtr = NULL; | |
647 | double power10 = uprv_strtod(key, &endPtr); | |
648 | if (*endPtr != 0) { | |
649 | errorCode = U_INTERNAL_PROGRAM_ERROR; | |
650 | return; | |
651 | } | |
652 | int32_t log10Value = computeLog10(power10, FALSE); | |
653 | ||
654 | // Silently ignore divisors that are too big. | |
655 | if (log10Value >= MAX_DIGITS) continue; | |
656 | ||
657 | // Iterate over the plural variants ("one", "other", etc) | |
658 | ResourceTable pluralVariantsTable = value.getTable(errorCode); | |
659 | if (U_FAILURE(errorCode)) { return; } | |
660 | for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { | |
661 | const char* pluralVariant = key; | |
662 | const UnicodeString formatStr = value.getUnicodeString(errorCode); | |
663 | ||
664 | // Copy the data into the in-memory data bundle (do not overwrite | |
665 | // existing values) | |
666 | int32_t numZeros = populatePrefixSuffix( | |
667 | pluralVariant, log10Value, formatStr, | |
668 | destination->unitsByVariant, FALSE, errorCode); | |
669 | ||
670 | // If populatePrefixSuffix returns -1, it means that this key has been | |
671 | // encountered already. | |
672 | if (numZeros < 0) { | |
673 | continue; | |
674 | } | |
675 | ||
676 | // Set the divisor, which is based on the number of zeros in the template | |
677 | // string. If the divisor from here is different from the one previously | |
678 | // stored, it means that the number of zeros in different plural variants | |
679 | // differs; throw an exception. | |
680 | // TODO: How should I check for floating-point errors here? | |
681 | // Is there a good reason why "divisor" is double and not long like Java? | |
682 | double divisor = calculateDivisor(power10, numZeros); | |
683 | if (destination->divisors[log10Value] != 0.0 | |
684 | && destination->divisors[log10Value] != divisor) { | |
685 | errorCode = U_INTERNAL_PROGRAM_ERROR; | |
686 | return; | |
687 | } | |
688 | destination->divisors[log10Value] = divisor; | |
51004dcb | 689 | } |
51004dcb | 690 | } |
51004dcb | 691 | } |
f3c0d7a5 | 692 | } |
51004dcb | 693 | } |
f3c0d7a5 | 694 | }; |
51004dcb | 695 | |
f3c0d7a5 A |
696 | // Virtual destructors must be defined out of line. |
697 | CmptDecDataSink::~CmptDecDataSink() {} | |
51004dcb | 698 | |
f3c0d7a5 | 699 | } // namespace |
51004dcb | 700 | |
f3c0d7a5 A |
701 | static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) { |
702 | LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status)); | |
51004dcb A |
703 | if (U_FAILURE(status)) { |
704 | return; | |
705 | } | |
f3c0d7a5 | 706 | const char* nsName = ns->getName(); |
51004dcb | 707 | |
f3c0d7a5 | 708 | LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status)); |
51004dcb A |
709 | if (U_FAILURE(status)) { |
710 | return; | |
711 | } | |
f3c0d7a5 A |
712 | CmptDecDataSink sink(*result); |
713 | sink.isFallback = FALSE; | |
714 | ||
715 | // First load the number elements data if nsName is not Latin. | |
716 | if (uprv_strcmp(nsName, gLatnTag) != 0) { | |
717 | sink.isLatin = FALSE; | |
718 | CharString path; | |
719 | path.append(gNumberElementsTag, status) | |
720 | .append('/', status) | |
721 | .append(nsName, status); | |
722 | ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status); | |
723 | if (status == U_MISSING_RESOURCE_ERROR) { | |
724 | // Silently ignore and use Latin | |
725 | status = U_ZERO_ERROR; | |
726 | } else if (U_FAILURE(status)) { | |
51004dcb A |
727 | return; |
728 | } | |
f3c0d7a5 | 729 | sink.isFallback = TRUE; |
51004dcb | 730 | } |
f3c0d7a5 A |
731 | |
732 | // Now load Latin. | |
733 | sink.isLatin = TRUE; | |
734 | ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status); | |
735 | if (U_FAILURE(status)) return; | |
736 | ||
737 | // If longData is empty, default it to be equal to shortData | |
738 | if (result->longData.isEmpty()) { | |
739 | result->longData.setToBogus(); | |
51004dcb | 740 | } |
f3c0d7a5 A |
741 | |
742 | // Check for "other" variants in each of the three data classes, and resolve missing elements. | |
743 | ||
744 | if (!result->longData.isBogus()) { | |
745 | checkForOtherVariants(&result->longData, status); | |
746 | if (U_FAILURE(status)) return; | |
747 | fillInMissing(&result->longData); | |
51004dcb | 748 | } |
f3c0d7a5 A |
749 | |
750 | checkForOtherVariants(&result->shortData, status); | |
751 | if (U_FAILURE(status)) return; | |
752 | fillInMissing(&result->shortData); | |
753 | ||
754 | // TODO: Enable this statement when currency support is added | |
755 | // checkForOtherVariants(&result->shortCurrencyData, status); | |
756 | // if (U_FAILURE(status)) return; | |
757 | // fillInMissing(&result->shortCurrencyData); | |
51004dcb A |
758 | } |
759 | ||
760 | // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a | |
761 | // given variant and log10 value. | |
762 | // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'. | |
763 | // formatStr is the format string from which the prefix and suffix are | |
764 | // extracted. It is usually of form 'Pefix 000 suffix'. | |
765 | // populatePrefixSuffix returns the number of 0's found in formatStr | |
766 | // before the decimal point. | |
767 | // In the special case that formatStr contains only spaces for prefix | |
768 | // and suffix, populatePrefixSuffix returns log10Value + 1. | |
769 | static int32_t populatePrefixSuffix( | |
f3c0d7a5 | 770 | const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) { |
51004dcb A |
771 | if (U_FAILURE(status)) { |
772 | return 0; | |
773 | } | |
f3c0d7a5 A |
774 | |
775 | // ICU 59 HACK: Ignore negative part of format string, mimicking ICU 58 behavior. | |
776 | // TODO(sffc): Make sure this is fixed during the overhaul port in ICU 60. | |
777 | int32_t semiPos = formatStr.indexOf(';', 0); | |
778 | if (semiPos == -1) { | |
779 | semiPos = formatStr.length(); | |
780 | } | |
781 | UnicodeString positivePart = formatStr.tempSubString(0, semiPos); | |
782 | ||
783 | int32_t firstIdx = positivePart.indexOf(kZero, UPRV_LENGTHOF(kZero), 0); | |
51004dcb A |
784 | // We must have 0's in format string. |
785 | if (firstIdx == -1) { | |
786 | status = U_INTERNAL_PROGRAM_ERROR; | |
787 | return 0; | |
788 | } | |
f3c0d7a5 | 789 | int32_t lastIdx = positivePart.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx); |
51004dcb A |
790 | CDFUnit* unit = createCDFUnit(variant, log10Value, result, status); |
791 | if (U_FAILURE(status)) { | |
792 | return 0; | |
793 | } | |
f3c0d7a5 A |
794 | |
795 | // Return -1 if we are not overwriting an existing value | |
796 | if (unit->isSet() && !overwrite) { | |
797 | return -1; | |
798 | } | |
799 | unit->markAsSet(); | |
800 | ||
51004dcb | 801 | // Everything up to first 0 is the prefix |
f3c0d7a5 | 802 | unit->prefix = positivePart.tempSubString(0, firstIdx); |
51004dcb A |
803 | fixQuotes(unit->prefix); |
804 | // Everything beyond the last 0 is the suffix | |
f3c0d7a5 | 805 | unit->suffix = positivePart.tempSubString(lastIdx + 1); |
51004dcb A |
806 | fixQuotes(unit->suffix); |
807 | ||
808 | // If there is effectively no prefix or suffix, ignore the actual number of | |
809 | // 0's and act as if the number of 0's matches the size of the number. | |
810 | if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) { | |
811 | return log10Value + 1; | |
812 | } | |
813 | ||
814 | // Calculate number of zeros before decimal point | |
815 | int32_t idx = firstIdx + 1; | |
f3c0d7a5 | 816 | while (idx <= lastIdx && positivePart.charAt(idx) == u_0) { |
51004dcb A |
817 | ++idx; |
818 | } | |
819 | return (idx - firstIdx); | |
820 | } | |
821 | ||
f3c0d7a5 A |
822 | // Calculate a divisor based on the magnitude and number of zeros in the |
823 | // template string. | |
824 | static double calculateDivisor(double power10, int32_t numZeros) { | |
825 | double divisor = power10; | |
826 | for (int32_t i = 1; i < numZeros; ++i) { | |
827 | divisor /= 10.0; | |
828 | } | |
829 | return divisor; | |
830 | } | |
831 | ||
51004dcb A |
832 | static UBool onlySpaces(UnicodeString u) { |
833 | return u.trim().length() == 0; | |
834 | } | |
835 | ||
836 | // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j. | |
837 | // Modifies s in place. | |
838 | static void fixQuotes(UnicodeString& s) { | |
839 | QuoteState state = OUTSIDE; | |
840 | int32_t len = s.length(); | |
841 | int32_t dest = 0; | |
842 | for (int32_t i = 0; i < len; ++i) { | |
843 | UChar ch = s.charAt(i); | |
844 | if (ch == u_apos) { | |
845 | if (state == INSIDE_EMPTY) { | |
846 | s.setCharAt(dest, ch); | |
847 | ++dest; | |
848 | } | |
849 | } else { | |
850 | s.setCharAt(dest, ch); | |
851 | ++dest; | |
852 | } | |
853 | ||
854 | // Update state | |
855 | switch (state) { | |
856 | case OUTSIDE: | |
857 | state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE; | |
858 | break; | |
859 | case INSIDE_EMPTY: | |
860 | case INSIDE_FULL: | |
861 | state = ch == u_apos ? OUTSIDE : INSIDE_FULL; | |
862 | break; | |
863 | default: | |
864 | break; | |
865 | } | |
866 | } | |
867 | s.truncate(dest); | |
868 | } | |
869 | ||
f3c0d7a5 A |
870 | // Checks to make sure that an "other" variant is present in all |
871 | // powers of 10. | |
872 | static void checkForOtherVariants(CDFLocaleStyleData* result, | |
873 | UErrorCode& status) { | |
874 | if (result == NULL || result->unitsByVariant == NULL) { | |
875 | return; | |
876 | } | |
877 | ||
878 | const CDFUnit* otherByBase = | |
879 | (const CDFUnit*) uhash_get(result->unitsByVariant, gOther); | |
880 | if (otherByBase == NULL) { | |
881 | status = U_INTERNAL_PROGRAM_ERROR; | |
882 | return; | |
883 | } | |
884 | ||
885 | // Check all other plural variants, and make sure that if | |
886 | // any of them are populated, then other is also populated | |
887 | int32_t pos = UHASH_FIRST; | |
888 | const UHashElement* element; | |
889 | while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) { | |
890 | CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer; | |
891 | if (variantsByBase == otherByBase) continue; | |
892 | for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) { | |
893 | if (variantsByBase[log10Value].isSet() | |
894 | && !otherByBase[log10Value].isSet()) { | |
895 | status = U_INTERNAL_PROGRAM_ERROR; | |
896 | return; | |
897 | } | |
898 | } | |
899 | } | |
900 | } | |
901 | ||
51004dcb A |
902 | // fillInMissing ensures that the data in result is complete. |
903 | // result data is complete if for each variant in result, there exists | |
904 | // a prefix-suffix pair for each log10 value and there also exists | |
905 | // a divisor for each log10 value. | |
906 | // | |
907 | // First this function figures out for which log10 values, the other | |
908 | // variant already had data. These are the same log10 values defined | |
909 | // in CLDR. | |
910 | // | |
911 | // For each log10 value not defined in CLDR, it uses the divisor for | |
912 | // the last defined log10 value or 1. | |
913 | // | |
914 | // Then for each variant, it does the following. For each log10 | |
915 | // value not defined in CLDR, copy the prefix-suffix pair from the | |
916 | // previous log10 value. If log10 value is defined in CLDR but is | |
917 | // missing from given variant, copy the prefix-suffix pair for that | |
918 | // log10 value from the 'other' variant. | |
919 | static void fillInMissing(CDFLocaleStyleData* result) { | |
920 | const CDFUnit* otherUnits = | |
921 | (const CDFUnit*) uhash_get(result->unitsByVariant, gOther); | |
922 | UBool definedInCLDR[MAX_DIGITS]; | |
923 | double lastDivisor = 1.0; | |
924 | for (int32_t i = 0; i < MAX_DIGITS; ++i) { | |
925 | if (!otherUnits[i].isSet()) { | |
926 | result->divisors[i] = lastDivisor; | |
927 | definedInCLDR[i] = FALSE; | |
928 | } else { | |
929 | lastDivisor = result->divisors[i]; | |
930 | definedInCLDR[i] = TRUE; | |
931 | } | |
932 | } | |
933 | // Iterate over each variant. | |
b331163b | 934 | int32_t pos = UHASH_FIRST; |
51004dcb A |
935 | const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos); |
936 | for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) { | |
937 | CDFUnit* units = (CDFUnit*) element->value.pointer; | |
938 | for (int32_t i = 0; i < MAX_DIGITS; ++i) { | |
939 | if (definedInCLDR[i]) { | |
940 | if (!units[i].isSet()) { | |
941 | units[i] = otherUnits[i]; | |
942 | } | |
943 | } else { | |
944 | if (i == 0) { | |
945 | units[0].markAsSet(); | |
946 | } else { | |
947 | units[i] = units[i - 1]; | |
948 | } | |
949 | } | |
950 | } | |
951 | } | |
952 | } | |
953 | ||
954 | // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest | |
955 | // value computeLog10 will return MAX_DIGITS -1 even for | |
956 | // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return | |
957 | // up to MAX_DIGITS. | |
958 | static int32_t computeLog10(double x, UBool inRange) { | |
959 | int32_t result = 0; | |
960 | int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS; | |
961 | while (x >= 10.0) { | |
962 | x /= 10.0; | |
963 | ++result; | |
964 | if (result == max) { | |
965 | break; | |
966 | } | |
967 | } | |
968 | return result; | |
969 | } | |
970 | ||
971 | // createCDFUnit returns a pointer to the prefix-suffix pair for a given | |
972 | // variant and log10 value within table. If no such prefix-suffix pair is | |
973 | // stored in table, one is created within table before returning pointer. | |
974 | static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) { | |
975 | if (U_FAILURE(status)) { | |
976 | return NULL; | |
977 | } | |
978 | CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant); | |
979 | if (cdfUnit == NULL) { | |
980 | cdfUnit = new CDFUnit[MAX_DIGITS]; | |
981 | if (cdfUnit == NULL) { | |
982 | status = U_MEMORY_ALLOCATION_ERROR; | |
983 | return NULL; | |
984 | } | |
985 | uhash_put(table, uprv_strdup(variant), cdfUnit, &status); | |
986 | if (U_FAILURE(status)) { | |
987 | return NULL; | |
988 | } | |
989 | } | |
990 | CDFUnit* result = &cdfUnit[log10Value]; | |
51004dcb A |
991 | return result; |
992 | } | |
993 | ||
994 | // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given | |
995 | // variant and log10 value within table. If the given variant doesn't exist, it | |
996 | // falls back to the OTHER variant. Therefore, this method will always return | |
997 | // some non-NULL value. | |
998 | static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) { | |
999 | CharString cvariant; | |
1000 | UErrorCode status = U_ZERO_ERROR; | |
1001 | const CDFUnit *cdfUnit = NULL; | |
1002 | cvariant.appendInvariantChars(variant, status); | |
1003 | if (!U_FAILURE(status)) { | |
1004 | cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data()); | |
1005 | } | |
1006 | if (cdfUnit == NULL) { | |
1007 | cdfUnit = (const CDFUnit*) uhash_get(table, gOther); | |
1008 | } | |
1009 | return &cdfUnit[log10Value]; | |
1010 | } | |
1011 | ||
1012 | U_NAMESPACE_END | |
1013 | #endif |