1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 #include "unicode/ustring.h"
9 #include "unicode/ures.h"
13 #include "number_compact.h"
14 #include "number_microprops.h"
18 using namespace icu::number
;
19 using namespace icu::number::impl
;
23 // A dummy object used when a "0" compact decimal entry is encountered. This is necessary
24 // in order to prevent falling back to root. Object equality ("==") is intended.
25 const UChar
*USE_FALLBACK
= u
"<USE FALLBACK>";
27 /** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */
28 void getResourceBundleKey(const char *nsName
, CompactStyle compactStyle
, CompactType compactType
,
29 CharString
&sb
, UErrorCode
&status
) {
31 sb
.append("NumberElements/", status
);
32 sb
.append(nsName
, status
);
33 sb
.append(compactStyle
== CompactStyle::UNUM_SHORT
? "/patternsShort" : "/patternsLong", status
);
34 sb
.append(compactType
== CompactType::TYPE_DECIMAL
? "/decimalFormat" : "/currencyFormat", status
);
37 int32_t getIndex(int32_t magnitude
, StandardPlural::Form plural
) {
38 return magnitude
* StandardPlural::COUNT
+ plural
;
41 int32_t countZeros(const UChar
*patternString
, int32_t patternLength
) {
42 // NOTE: This strategy for computing the number of zeros is a hack for efficiency.
43 // It could break if there are any 0s that aren't part of the main pattern.
45 for (int32_t i
= 0; i
< patternLength
; i
++) {
46 if (patternString
[i
] == u
'0') {
48 } else if (numZeros
> 0) {
49 break; // zeros should always be contiguous
57 // NOTE: patterns and multipliers both get zero-initialized.
58 CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE
) {
61 void CompactData::populate(const Locale
&locale
, const char *nsName
, CompactStyle compactStyle
,
62 CompactType compactType
, UErrorCode
&status
) {
63 CompactDataSink
sink(*this);
64 LocalUResourceBundlePointer
rb(ures_open(nullptr, locale
.getName(), &status
));
65 if (U_FAILURE(status
)) { return; }
67 bool nsIsLatn
= strcmp(nsName
, "latn") == 0;
68 bool compactIsShort
= compactStyle
== CompactStyle::UNUM_SHORT
;
70 // Fall back to latn numbering system and/or short compact style.
71 CharString resourceKey
;
72 getResourceBundleKey(nsName
, compactStyle
, compactType
, resourceKey
, status
);
73 UErrorCode localStatus
= U_ZERO_ERROR
;
74 ures_getAllItemsWithFallback(rb
.getAlias(), resourceKey
.data(), sink
, localStatus
);
75 if (isEmpty
&& !nsIsLatn
) {
76 getResourceBundleKey("latn", compactStyle
, compactType
, resourceKey
, status
);
77 localStatus
= U_ZERO_ERROR
;
78 ures_getAllItemsWithFallback(rb
.getAlias(), resourceKey
.data(), sink
, localStatus
);
80 if (isEmpty
&& !compactIsShort
) {
81 getResourceBundleKey(nsName
, CompactStyle::UNUM_SHORT
, compactType
, resourceKey
, status
);
82 localStatus
= U_ZERO_ERROR
;
83 ures_getAllItemsWithFallback(rb
.getAlias(), resourceKey
.data(), sink
, localStatus
);
85 if (isEmpty
&& !nsIsLatn
&& !compactIsShort
) {
86 getResourceBundleKey("latn", CompactStyle::UNUM_SHORT
, compactType
, resourceKey
, status
);
87 localStatus
= U_ZERO_ERROR
;
88 ures_getAllItemsWithFallback(rb
.getAlias(), resourceKey
.data(), sink
, localStatus
);
91 // The last fallback should be guaranteed to return data.
93 status
= U_INTERNAL_PROGRAM_ERROR
;
97 int32_t CompactData::getMultiplier(int32_t magnitude
) const {
101 if (magnitude
> largestMagnitude
) {
102 magnitude
= largestMagnitude
;
104 return multipliers
[magnitude
];
107 const UChar
*CompactData::getPattern(int32_t magnitude
, StandardPlural::Form plural
) const {
111 if (magnitude
> largestMagnitude
) {
112 magnitude
= largestMagnitude
;
114 const UChar
*patternString
= patterns
[getIndex(magnitude
, plural
)];
115 if (patternString
== nullptr && plural
!= StandardPlural::OTHER
) {
116 // Fall back to "other" plural variant
117 patternString
= patterns
[getIndex(magnitude
, StandardPlural::OTHER
)];
119 if (patternString
== USE_FALLBACK
) { // == is intended
120 // Return null if USE_FALLBACK is present
121 patternString
= nullptr;
123 return patternString
;
126 void CompactData::getUniquePatterns(UVector
&output
, UErrorCode
&status
) const {
127 U_ASSERT(output
.isEmpty());
128 // NOTE: In C++, this is done more manually with a UVector.
129 // In Java, we can take advantage of JDK HashSet.
130 for (auto pattern
: patterns
) {
131 if (pattern
== nullptr || pattern
== USE_FALLBACK
) {
135 // Insert pattern into the UVector if the UVector does not already contain the pattern.
136 // Search the UVector from the end since identical patterns are likely to be adjacent.
137 for (int32_t i
= output
.size() - 1; i
>= 0; i
--) {
138 if (u_strcmp(pattern
, static_cast<const UChar
*>(output
[i
])) == 0) {
143 // The string was not found; add it to the UVector.
144 // ANDY: This requires a const_cast. Why?
145 output
.addElement(const_cast<UChar
*>(pattern
), status
);
152 void CompactData::CompactDataSink::put(const char *key
, ResourceValue
&value
, UBool
/*noFallback*/,
153 UErrorCode
&status
) {
154 // traverse into the table of powers of ten
155 ResourceTable powersOfTenTable
= value
.getTable(status
);
156 if (U_FAILURE(status
)) { return; }
157 for (int i3
= 0; powersOfTenTable
.getKeyAndValue(i3
, key
, value
); ++i3
) {
159 // Assumes that the keys are always of the form "10000" where the magnitude is the
160 // length of the key minus one. We expect magnitudes to be less than MAX_DIGITS.
161 auto magnitude
= static_cast<int8_t> (strlen(key
) - 1);
162 int8_t multiplier
= data
.multipliers
[magnitude
];
163 U_ASSERT(magnitude
< COMPACT_MAX_DIGITS
);
165 // Iterate over the plural variants ("one", "other", etc)
166 ResourceTable pluralVariantsTable
= value
.getTable(status
);
167 if (U_FAILURE(status
)) { return; }
168 for (int i4
= 0; pluralVariantsTable
.getKeyAndValue(i4
, key
, value
); ++i4
) {
170 // Skip this magnitude/plural if we already have it from a child locale.
171 // Note: This also skips USE_FALLBACK entries.
172 StandardPlural::Form plural
= StandardPlural::fromString(key
, status
);
173 if (U_FAILURE(status
)) { return; }
174 if (data
.patterns
[getIndex(magnitude
, plural
)] != nullptr) {
178 // The value "0" means that we need to use the default pattern and not fall back
179 // to parent locales. Example locale where this is relevant: 'it'.
180 int32_t patternLength
;
181 const UChar
*patternString
= value
.getString(patternLength
, status
);
182 if (U_FAILURE(status
)) { return; }
183 if (u_strcmp(patternString
, u
"0") == 0) {
184 patternString
= USE_FALLBACK
;
188 // Save the pattern string. We will parse it lazily.
189 data
.patterns
[getIndex(magnitude
, plural
)] = patternString
;
191 // If necessary, compute the multiplier: the difference between the magnitude
192 // and the number of zeros in the pattern.
193 if (multiplier
== 0) {
194 int32_t numZeros
= countZeros(patternString
, patternLength
);
195 if (numZeros
> 0) { // numZeros==0 in certain cases, like Somali "Kun"
196 multiplier
= static_cast<int8_t> (numZeros
- magnitude
- 1);
201 // Save the multiplier.
202 if (data
.multipliers
[magnitude
] == 0) {
203 data
.multipliers
[magnitude
] = multiplier
;
204 if (magnitude
> data
.largestMagnitude
) {
205 data
.largestMagnitude
= magnitude
;
207 data
.isEmpty
= false;
209 U_ASSERT(data
.multipliers
[magnitude
] == multiplier
);
214 ///////////////////////////////////////////////////////////
215 /// END OF CompactData.java; BEGIN CompactNotation.java ///
216 ///////////////////////////////////////////////////////////
218 CompactHandler::CompactHandler(CompactStyle compactStyle
, const Locale
&locale
, const char *nsName
,
219 CompactType compactType
, const PluralRules
*rules
,
220 MutablePatternModifier
*buildReference
, const MicroPropsGenerator
*parent
,
222 : rules(rules
), parent(parent
) {
223 data
.populate(locale
, nsName
, compactStyle
, compactType
, status
);
224 if (buildReference
!= nullptr) {
226 precomputeAllModifiers(*buildReference
, status
);
234 CompactHandler::~CompactHandler() {
235 for (int32_t i
= 0; i
< precomputedModsLength
; i
++) {
236 delete precomputedMods
[i
].mod
;
240 void CompactHandler::precomputeAllModifiers(MutablePatternModifier
&buildReference
, UErrorCode
&status
) {
241 if (U_FAILURE(status
)) { return; }
243 // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T
244 UVector
allPatterns(12, status
);
245 if (U_FAILURE(status
)) { return; }
246 data
.getUniquePatterns(allPatterns
, status
);
247 if (U_FAILURE(status
)) { return; }
249 // C++ only: ensure that precomputedMods has room.
250 precomputedModsLength
= allPatterns
.size();
251 if (precomputedMods
.getCapacity() < precomputedModsLength
) {
252 precomputedMods
.resize(allPatterns
.size(), status
);
253 if (U_FAILURE(status
)) { return; }
256 for (int32_t i
= 0; i
< precomputedModsLength
; i
++) {
257 auto patternString
= static_cast<const UChar
*>(allPatterns
[i
]);
258 UnicodeString
hello(patternString
);
259 CompactModInfo
&info
= precomputedMods
[i
];
260 ParsedPatternInfo patternInfo
;
261 PatternParser::parseToPatternInfo(UnicodeString(patternString
), patternInfo
, status
);
262 if (U_FAILURE(status
)) { return; }
263 buildReference
.setPatternInfo(&patternInfo
);
264 info
.mod
= buildReference
.createImmutable(status
);
265 if (U_FAILURE(status
)) { return; }
266 info
.patternString
= patternString
;
270 void CompactHandler::processQuantity(DecimalQuantity
&quantity
, MicroProps
µs
,
271 UErrorCode
&status
) const {
272 parent
->processQuantity(quantity
, micros
, status
);
273 if (U_FAILURE(status
)) { return; }
275 // Treat zero as if it had magnitude 0
277 if (quantity
.isZero()) {
279 micros
.rounder
.apply(quantity
, status
);
281 // TODO: Revisit chooseMultiplierAndApply
282 int multiplier
= micros
.rounder
.chooseMultiplierAndApply(quantity
, data
, status
);
283 magnitude
= quantity
.isZero() ? 0 : quantity
.getMagnitude();
284 magnitude
-= multiplier
;
287 StandardPlural::Form plural
= utils::getStandardPlural(rules
, quantity
);
288 const UChar
*patternString
= data
.getPattern(magnitude
, plural
);
289 if (patternString
== nullptr) {
290 // Use the default (non-compact) modifier.
291 // No need to take any action.
294 // Java uses a hash set here for O(1) lookup. C++ uses a linear search.
295 // TODO: Benchmark this and maybe change to a binary search or hash table.
297 for (; i
< precomputedModsLength
; i
++) {
298 const CompactModInfo
&info
= precomputedMods
[i
];
299 if (u_strcmp(patternString
, info
.patternString
) == 0) {
300 info
.mod
->applyToMicros(micros
, quantity
);
304 // It should be guaranteed that we found the entry.
305 U_ASSERT(i
< precomputedModsLength
);
308 // Overwrite the PatternInfo in the existing modMiddle.
309 // C++ Note: Use unsafePatternInfo for proper lifecycle.
310 ParsedPatternInfo
&patternInfo
= const_cast<CompactHandler
*>(this)->unsafePatternInfo
;
311 PatternParser::parseToPatternInfo(UnicodeString(patternString
), patternInfo
, status
);
312 static_cast<MutablePatternModifier
*>(const_cast<Modifier
*>(micros
.modMiddle
))
313 ->setPatternInfo(&patternInfo
);
316 // We already performed rounding. Do not perform it again.
317 micros
.rounder
= RoundingImpl::passThrough();
320 #endif /* #if !UCONFIG_NO_FORMATTING */