]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/number_modifiers.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / number_modifiers.cpp
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16
17 namespace {
18
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
26
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTS = nullptr;
29
30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31 delete UNISET_DIGIT;
32 UNISET_DIGIT = nullptr;
33 delete UNISET_NOTS;
34 UNISET_NOTS = nullptr;
35 gDefaultCurrencySpacingInitOnce.reset();
36 return TRUE;
37 }
38
39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40 ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41 UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42 UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status);
43 if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) {
44 status = U_MEMORY_ALLOCATION_ERROR;
45 return;
46 }
47 UNISET_DIGIT->freeze();
48 UNISET_NOTS->freeze();
49 }
50
51 } // namespace
52
53
54 Modifier::~Modifier() = default;
55
56 Modifier::Parameters::Parameters()
57 : obj(nullptr) {}
58
59 Modifier::Parameters::Parameters(
60 const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
61 : obj(_obj), signum(_signum), plural(_plural) {}
62
63 ModifierStore::~ModifierStore() = default;
64
65 AdoptingModifierStore::~AdoptingModifierStore() {
66 for (const Modifier *mod : mods) {
67 delete mod;
68 }
69 }
70
71
72 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
73 UErrorCode &status) const {
74 // Insert the suffix first since inserting the prefix will change the rightIndex
75 int length = output.insert(rightIndex, fSuffix, fField, status);
76 length += output.insert(leftIndex, fPrefix, fField, status);
77 return length;
78 }
79
80 int32_t ConstantAffixModifier::getPrefixLength() const {
81 return fPrefix.length();
82 }
83
84 int32_t ConstantAffixModifier::getCodePointCount() const {
85 return fPrefix.countChar32() + fSuffix.countChar32();
86 }
87
88 bool ConstantAffixModifier::isStrong() const {
89 return fStrong;
90 }
91
92 bool ConstantAffixModifier::containsField(UNumberFormatFields field) const {
93 (void)field;
94 // This method is not currently used.
95 UPRV_UNREACHABLE;
96 }
97
98 void ConstantAffixModifier::getParameters(Parameters& output) const {
99 (void)output;
100 // This method is not currently used.
101 UPRV_UNREACHABLE;
102 }
103
104 bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
105 auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
106 if (_other == nullptr) {
107 return false;
108 }
109 return fPrefix == _other->fPrefix
110 && fSuffix == _other->fSuffix
111 && fField == _other->fField
112 && fStrong == _other->fStrong;
113 }
114
115
116 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
117 : SimpleModifier(simpleFormatter, field, strong, {}) {}
118
119 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
120 const Modifier::Parameters parameters)
121 : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
122 fParameters(parameters) {
123 int32_t argLimit = SimpleFormatter::getArgumentLimit(
124 fCompiledPattern.getBuffer(), fCompiledPattern.length());
125 if (argLimit == 0) {
126 // No arguments in compiled pattern
127 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
128 U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
129 // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
130 fSuffixOffset = -1;
131 fSuffixLength = 0;
132 } else {
133 U_ASSERT(argLimit == 1);
134 if (fCompiledPattern.charAt(1) != 0) {
135 // Found prefix
136 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
137 fSuffixOffset = 3 + fPrefixLength;
138 } else {
139 // No prefix
140 fPrefixLength = 0;
141 fSuffixOffset = 2;
142 }
143 if (3 + fPrefixLength < fCompiledPattern.length()) {
144 // Found suffix
145 fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
146 } else {
147 // No suffix
148 fSuffixLength = 0;
149 }
150 }
151 }
152
153 SimpleModifier::SimpleModifier()
154 : fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
155 }
156
157 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
158 UErrorCode &status) const {
159 return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
160 }
161
162 int32_t SimpleModifier::getPrefixLength() const {
163 return fPrefixLength;
164 }
165
166 int32_t SimpleModifier::getCodePointCount() const {
167 int32_t count = 0;
168 if (fPrefixLength > 0) {
169 count += fCompiledPattern.countChar32(2, fPrefixLength);
170 }
171 if (fSuffixLength > 0) {
172 count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
173 }
174 return count;
175 }
176
177 bool SimpleModifier::isStrong() const {
178 return fStrong;
179 }
180
181 bool SimpleModifier::containsField(UNumberFormatFields field) const {
182 (void)field;
183 // This method is not currently used.
184 UPRV_UNREACHABLE;
185 }
186
187 void SimpleModifier::getParameters(Parameters& output) const {
188 output = fParameters;
189 }
190
191 bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
192 auto* _other = dynamic_cast<const SimpleModifier*>(&other);
193 if (_other == nullptr) {
194 return false;
195 }
196 if (fParameters.obj != nullptr) {
197 return fParameters.obj == _other->fParameters.obj;
198 }
199 return fCompiledPattern == _other->fCompiledPattern
200 && fField == _other->fField
201 && fStrong == _other->fStrong;
202 }
203
204
205 int32_t
206 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
207 UErrorCode &status) const {
208 if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
209 // There is no argument for the inner number; overwrite the entire segment with our string.
210 return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
211 } else {
212 if (fPrefixLength > 0) {
213 result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
214 }
215 if (fSuffixLength > 0) {
216 result.insert(
217 endIndex + fPrefixLength,
218 fCompiledPattern,
219 1 + fSuffixOffset,
220 1 + fSuffixOffset + fSuffixLength,
221 fField,
222 status);
223 }
224 return fPrefixLength + fSuffixLength;
225 }
226 }
227
228
229 int32_t
230 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
231 int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
232 Field field, UErrorCode& status) {
233 const UnicodeString& compiledPattern = compiled.compiledPattern;
234 int32_t argLimit = SimpleFormatter::getArgumentLimit(
235 compiledPattern.getBuffer(), compiledPattern.length());
236 if (argLimit != 2) {
237 status = U_INTERNAL_PROGRAM_ERROR;
238 return 0;
239 }
240 int32_t offset = 1; // offset into compiledPattern
241 int32_t length = 0; // chars added to result
242
243 int32_t prefixLength = compiledPattern.charAt(offset);
244 offset++;
245 if (prefixLength < ARG_NUM_LIMIT) {
246 // No prefix
247 prefixLength = 0;
248 } else {
249 prefixLength -= ARG_NUM_LIMIT;
250 result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
251 offset += prefixLength;
252 length += prefixLength;
253 offset++;
254 }
255
256 int32_t infixLength = compiledPattern.charAt(offset);
257 offset++;
258 if (infixLength < ARG_NUM_LIMIT) {
259 // No infix
260 infixLength = 0;
261 } else {
262 infixLength -= ARG_NUM_LIMIT;
263 result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
264 offset += infixLength;
265 length += infixLength;
266 offset++;
267 }
268
269 int32_t suffixLength;
270 if (offset == compiledPattern.length()) {
271 // No suffix
272 suffixLength = 0;
273 } else {
274 suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT;
275 offset++;
276 result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
277 length += suffixLength;
278 }
279
280 *outPrefixLength = prefixLength;
281 *outSuffixLength = suffixLength;
282
283 return length;
284 }
285
286
287 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
288 UErrorCode &status) const {
289 int32_t length = output.insert(leftIndex, fPrefix, status);
290 if (fOverwrite) {
291 length += output.splice(
292 leftIndex + length,
293 rightIndex + length,
294 UnicodeString(), 0, 0,
295 UNUM_FIELD_COUNT, status);
296 }
297 length += output.insert(rightIndex + length, fSuffix, status);
298 return length;
299 }
300
301 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
302 return fPrefix.length();
303 }
304
305 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
306 return fPrefix.codePointCount() + fSuffix.codePointCount();
307 }
308
309 bool ConstantMultiFieldModifier::isStrong() const {
310 return fStrong;
311 }
312
313 bool ConstantMultiFieldModifier::containsField(UNumberFormatFields field) const {
314 return fPrefix.containsField(field) || fSuffix.containsField(field);
315 }
316
317 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
318 output = fParameters;
319 }
320
321 bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
322 auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
323 if (_other == nullptr) {
324 return false;
325 }
326 if (fParameters.obj != nullptr) {
327 return fParameters.obj == _other->fParameters.obj;
328 }
329 return fPrefix.contentEquals(_other->fPrefix)
330 && fSuffix.contentEquals(_other->fSuffix)
331 && fOverwrite == _other->fOverwrite
332 && fStrong == _other->fStrong;
333 }
334
335
336 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
337 const FormattedStringBuilder &suffix,
338 bool overwrite,
339 bool strong,
340 const DecimalFormatSymbols &symbols,
341 UErrorCode &status)
342 : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
343 // Check for currency spacing. Do not build the UnicodeSets unless there is
344 // a currency code point at a boundary.
345 if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) {
346 int prefixCp = prefix.getLastCodePoint();
347 UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
348 if (prefixUnicodeSet.contains(prefixCp)) {
349 fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
350 fAfterPrefixUnicodeSet.freeze();
351 fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
352 } else {
353 fAfterPrefixUnicodeSet.setToBogus();
354 fAfterPrefixInsert.setToBogus();
355 }
356 } else {
357 fAfterPrefixUnicodeSet.setToBogus();
358 fAfterPrefixInsert.setToBogus();
359 }
360 if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) {
361 int suffixCp = suffix.getLastCodePoint();
362 UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
363 if (suffixUnicodeSet.contains(suffixCp)) {
364 fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
365 fBeforeSuffixUnicodeSet.freeze();
366 fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
367 } else {
368 fBeforeSuffixUnicodeSet.setToBogus();
369 fBeforeSuffixInsert.setToBogus();
370 }
371 } else {
372 fBeforeSuffixUnicodeSet.setToBogus();
373 fBeforeSuffixInsert.setToBogus();
374 }
375 }
376
377 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
378 UErrorCode &status) const {
379 // Currency spacing logic
380 int length = 0;
381 if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
382 fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
383 // TODO: Should we use the CURRENCY field here?
384 length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status);
385 }
386 if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
387 fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
388 // TODO: Should we use the CURRENCY field here?
389 length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status);
390 }
391
392 // Call super for the remaining logic
393 length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
394 return length;
395 }
396
397 int32_t
398 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
399 int32_t prefixLen, int32_t suffixStart,
400 int32_t suffixLen,
401 const DecimalFormatSymbols &symbols,
402 UErrorCode &status) {
403 int length = 0;
404 bool hasPrefix = (prefixLen > 0);
405 bool hasSuffix = (suffixLen > 0);
406 bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
407 if (hasPrefix && hasNumber) {
408 length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
409 }
410 if (hasSuffix && hasNumber) {
411 length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
412 }
413 return length;
414 }
415
416 int32_t
417 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
418 EAffix affix,
419 const DecimalFormatSymbols &symbols,
420 UErrorCode &status) {
421 // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
422 // This works even if the last code point in the prefix is 2 code units because the
423 // field value gets populated to both indices in the field array.
424 Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
425 if (affixField != UNUM_CURRENCY_FIELD) {
426 return 0;
427 }
428 int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
429 UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
430 if (!affixUniset.contains(affixCp)) {
431 return 0;
432 }
433 int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
434 UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
435 if (!numberUniset.contains(numberCp)) {
436 return 0;
437 }
438 UnicodeString spacingString = getInsertString(symbols, affix, status);
439
440 // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
441 // It would be more efficient if this could be done before affixes were attached,
442 // so that it could be prepended/appended instead of inserted.
443 // However, the build code path is more efficient, and this is the most natural
444 // place to put currency spacing in the non-build code path.
445 // TODO: Should we use the CURRENCY field here?
446 return output.insert(index, spacingString, UNUM_FIELD_COUNT, status);
447 }
448
449 UnicodeSet
450 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
451 EAffix affix, UErrorCode &status) {
452 // Ensure the static defaults are initialized:
453 umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
454 if (U_FAILURE(status)) {
455 return UnicodeSet();
456 }
457
458 const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
459 position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
460 affix == SUFFIX,
461 status);
462 if (pattern.compare(u"[:digit:]", -1) == 0) {
463 return *UNISET_DIGIT;
464 } else if (pattern.compare(u"[:^S:]", -1) == 0) {
465 return *UNISET_NOTS;
466 } else {
467 return UnicodeSet(pattern, status);
468 }
469 }
470
471 UnicodeString
472 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
473 UErrorCode &status) {
474 return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
475 }
476
477 #endif /* #if !UCONFIG_NO_FORMATTING */