1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
11 #include "number_modifiers.h"
14 using namespace icu::number
;
15 using namespace icu::number::impl
;
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT
= 0x100;
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce
= U_INITONCE_INITIALIZER
;
27 UnicodeSet
*UNISET_DIGIT
= nullptr;
28 UnicodeSet
*UNISET_NOTS
= nullptr;
30 UBool U_CALLCONV
cleanupDefaultCurrencySpacing() {
32 UNISET_DIGIT
= nullptr;
34 UNISET_NOTS
= nullptr;
35 gDefaultCurrencySpacingInitOnce
.reset();
39 void U_CALLCONV
initDefaultCurrencySpacing(UErrorCode
&status
) {
40 ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING
, cleanupDefaultCurrencySpacing
);
41 UNISET_DIGIT
= new UnicodeSet(UnicodeString(u
"[:digit:]"), status
);
42 UNISET_NOTS
= new UnicodeSet(UnicodeString(u
"[:^S:]"), status
);
43 if (UNISET_DIGIT
== nullptr || UNISET_NOTS
== nullptr) {
44 status
= U_MEMORY_ALLOCATION_ERROR
;
47 UNISET_DIGIT
->freeze();
48 UNISET_NOTS
->freeze();
54 Modifier::~Modifier() = default;
56 Modifier::Parameters::Parameters()
59 Modifier::Parameters::Parameters(
60 const ModifierStore
* _obj
, Signum _signum
, StandardPlural::Form _plural
)
61 : obj(_obj
), signum(_signum
), plural(_plural
) {}
63 ModifierStore::~ModifierStore() = default;
65 AdoptingModifierStore::~AdoptingModifierStore() {
66 for (const Modifier
*mod
: mods
) {
72 int32_t ConstantAffixModifier::apply(FormattedStringBuilder
&output
, int leftIndex
, int rightIndex
,
73 UErrorCode
&status
) const {
74 // Insert the suffix first since inserting the prefix will change the rightIndex
75 int length
= output
.insert(rightIndex
, fSuffix
, fField
, status
);
76 length
+= output
.insert(leftIndex
, fPrefix
, fField
, status
);
80 int32_t ConstantAffixModifier::getPrefixLength() const {
81 return fPrefix
.length();
84 int32_t ConstantAffixModifier::getCodePointCount() const {
85 return fPrefix
.countChar32() + fSuffix
.countChar32();
88 bool ConstantAffixModifier::isStrong() const {
92 bool ConstantAffixModifier::containsField(UNumberFormatFields field
) const {
94 // This method is not currently used.
98 void ConstantAffixModifier::getParameters(Parameters
& output
) const {
100 // This method is not currently used.
104 bool ConstantAffixModifier::semanticallyEquivalent(const Modifier
& other
) const {
105 auto* _other
= dynamic_cast<const ConstantAffixModifier
*>(&other
);
106 if (_other
== nullptr) {
109 return fPrefix
== _other
->fPrefix
110 && fSuffix
== _other
->fSuffix
111 && fField
== _other
->fField
112 && fStrong
== _other
->fStrong
;
116 SimpleModifier::SimpleModifier(const SimpleFormatter
&simpleFormatter
, Field field
, bool strong
)
117 : SimpleModifier(simpleFormatter
, field
, strong
, {}) {}
119 SimpleModifier::SimpleModifier(const SimpleFormatter
&simpleFormatter
, Field field
, bool strong
,
120 const Modifier::Parameters parameters
)
121 : fCompiledPattern(simpleFormatter
.compiledPattern
), fField(field
), fStrong(strong
),
122 fParameters(parameters
) {
123 int32_t argLimit
= SimpleFormatter::getArgumentLimit(
124 fCompiledPattern
.getBuffer(), fCompiledPattern
.length());
126 // No arguments in compiled pattern
127 fPrefixLength
= fCompiledPattern
.charAt(1) - ARG_NUM_LIMIT
;
128 U_ASSERT(2 + fPrefixLength
== fCompiledPattern
.length());
129 // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
133 U_ASSERT(argLimit
== 1);
134 if (fCompiledPattern
.charAt(1) != 0) {
136 fPrefixLength
= fCompiledPattern
.charAt(1) - ARG_NUM_LIMIT
;
137 fSuffixOffset
= 3 + fPrefixLength
;
143 if (3 + fPrefixLength
< fCompiledPattern
.length()) {
145 fSuffixLength
= fCompiledPattern
.charAt(fSuffixOffset
) - ARG_NUM_LIMIT
;
153 SimpleModifier::SimpleModifier()
154 : fField(UNUM_FIELD_COUNT
), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
157 int32_t SimpleModifier::apply(FormattedStringBuilder
&output
, int leftIndex
, int rightIndex
,
158 UErrorCode
&status
) const {
159 return formatAsPrefixSuffix(output
, leftIndex
, rightIndex
, status
);
162 int32_t SimpleModifier::getPrefixLength() const {
163 return fPrefixLength
;
166 int32_t SimpleModifier::getCodePointCount() const {
168 if (fPrefixLength
> 0) {
169 count
+= fCompiledPattern
.countChar32(2, fPrefixLength
);
171 if (fSuffixLength
> 0) {
172 count
+= fCompiledPattern
.countChar32(1 + fSuffixOffset
, fSuffixLength
);
177 bool SimpleModifier::isStrong() const {
181 bool SimpleModifier::containsField(UNumberFormatFields field
) const {
183 // This method is not currently used.
187 void SimpleModifier::getParameters(Parameters
& output
) const {
188 output
= fParameters
;
191 bool SimpleModifier::semanticallyEquivalent(const Modifier
& other
) const {
192 auto* _other
= dynamic_cast<const SimpleModifier
*>(&other
);
193 if (_other
== nullptr) {
196 if (fParameters
.obj
!= nullptr) {
197 return fParameters
.obj
== _other
->fParameters
.obj
;
199 return fCompiledPattern
== _other
->fCompiledPattern
200 && fField
== _other
->fField
201 && fStrong
== _other
->fStrong
;
206 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder
&result
, int32_t startIndex
, int32_t endIndex
,
207 UErrorCode
&status
) const {
208 if (fSuffixOffset
== -1 && fPrefixLength
+ fSuffixLength
> 0) {
209 // There is no argument for the inner number; overwrite the entire segment with our string.
210 return result
.splice(startIndex
, endIndex
, fCompiledPattern
, 2, 2 + fPrefixLength
, fField
, status
);
212 if (fPrefixLength
> 0) {
213 result
.insert(startIndex
, fCompiledPattern
, 2, 2 + fPrefixLength
, fField
, status
);
215 if (fSuffixLength
> 0) {
217 endIndex
+ fPrefixLength
,
220 1 + fSuffixOffset
+ fSuffixLength
,
224 return fPrefixLength
+ fSuffixLength
;
230 SimpleModifier::formatTwoArgPattern(const SimpleFormatter
& compiled
, FormattedStringBuilder
& result
,
231 int32_t index
, int32_t* outPrefixLength
, int32_t* outSuffixLength
,
232 Field field
, UErrorCode
& status
) {
233 const UnicodeString
& compiledPattern
= compiled
.compiledPattern
;
234 int32_t argLimit
= SimpleFormatter::getArgumentLimit(
235 compiledPattern
.getBuffer(), compiledPattern
.length());
237 status
= U_INTERNAL_PROGRAM_ERROR
;
240 int32_t offset
= 1; // offset into compiledPattern
241 int32_t length
= 0; // chars added to result
243 int32_t prefixLength
= compiledPattern
.charAt(offset
);
245 if (prefixLength
< ARG_NUM_LIMIT
) {
249 prefixLength
-= ARG_NUM_LIMIT
;
250 result
.insert(index
+ length
, compiledPattern
, offset
, offset
+ prefixLength
, field
, status
);
251 offset
+= prefixLength
;
252 length
+= prefixLength
;
256 int32_t infixLength
= compiledPattern
.charAt(offset
);
258 if (infixLength
< ARG_NUM_LIMIT
) {
262 infixLength
-= ARG_NUM_LIMIT
;
263 result
.insert(index
+ length
, compiledPattern
, offset
, offset
+ infixLength
, field
, status
);
264 offset
+= infixLength
;
265 length
+= infixLength
;
269 int32_t suffixLength
;
270 if (offset
== compiledPattern
.length()) {
274 suffixLength
= compiledPattern
.charAt(offset
) - ARG_NUM_LIMIT
;
276 result
.insert(index
+ length
, compiledPattern
, offset
, offset
+ suffixLength
, field
, status
);
277 length
+= suffixLength
;
280 *outPrefixLength
= prefixLength
;
281 *outSuffixLength
= suffixLength
;
287 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder
&output
, int leftIndex
, int rightIndex
,
288 UErrorCode
&status
) const {
289 int32_t length
= output
.insert(leftIndex
, fPrefix
, status
);
291 length
+= output
.splice(
294 UnicodeString(), 0, 0,
295 UNUM_FIELD_COUNT
, status
);
297 length
+= output
.insert(rightIndex
+ length
, fSuffix
, status
);
301 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
302 return fPrefix
.length();
305 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
306 return fPrefix
.codePointCount() + fSuffix
.codePointCount();
309 bool ConstantMultiFieldModifier::isStrong() const {
313 bool ConstantMultiFieldModifier::containsField(UNumberFormatFields field
) const {
314 return fPrefix
.containsField(field
) || fSuffix
.containsField(field
);
317 void ConstantMultiFieldModifier::getParameters(Parameters
& output
) const {
318 output
= fParameters
;
321 bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier
& other
) const {
322 auto* _other
= dynamic_cast<const ConstantMultiFieldModifier
*>(&other
);
323 if (_other
== nullptr) {
326 if (fParameters
.obj
!= nullptr) {
327 return fParameters
.obj
== _other
->fParameters
.obj
;
329 return fPrefix
.contentEquals(_other
->fPrefix
)
330 && fSuffix
.contentEquals(_other
->fSuffix
)
331 && fOverwrite
== _other
->fOverwrite
332 && fStrong
== _other
->fStrong
;
336 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder
&prefix
,
337 const FormattedStringBuilder
&suffix
,
340 const DecimalFormatSymbols
&symbols
,
342 : ConstantMultiFieldModifier(prefix
, suffix
, overwrite
, strong
) {
343 // Check for currency spacing. Do not build the UnicodeSets unless there is
344 // a currency code point at a boundary.
345 if (prefix
.length() > 0 && prefix
.fieldAt(prefix
.length() - 1) == UNUM_CURRENCY_FIELD
) {
346 int prefixCp
= prefix
.getLastCodePoint();
347 UnicodeSet prefixUnicodeSet
= getUnicodeSet(symbols
, IN_CURRENCY
, PREFIX
, status
);
348 if (prefixUnicodeSet
.contains(prefixCp
)) {
349 fAfterPrefixUnicodeSet
= getUnicodeSet(symbols
, IN_NUMBER
, PREFIX
, status
);
350 fAfterPrefixUnicodeSet
.freeze();
351 fAfterPrefixInsert
= getInsertString(symbols
, PREFIX
, status
);
353 fAfterPrefixUnicodeSet
.setToBogus();
354 fAfterPrefixInsert
.setToBogus();
357 fAfterPrefixUnicodeSet
.setToBogus();
358 fAfterPrefixInsert
.setToBogus();
360 if (suffix
.length() > 0 && suffix
.fieldAt(0) == UNUM_CURRENCY_FIELD
) {
361 int suffixCp
= suffix
.getLastCodePoint();
362 UnicodeSet suffixUnicodeSet
= getUnicodeSet(symbols
, IN_CURRENCY
, SUFFIX
, status
);
363 if (suffixUnicodeSet
.contains(suffixCp
)) {
364 fBeforeSuffixUnicodeSet
= getUnicodeSet(symbols
, IN_NUMBER
, SUFFIX
, status
);
365 fBeforeSuffixUnicodeSet
.freeze();
366 fBeforeSuffixInsert
= getInsertString(symbols
, SUFFIX
, status
);
368 fBeforeSuffixUnicodeSet
.setToBogus();
369 fBeforeSuffixInsert
.setToBogus();
372 fBeforeSuffixUnicodeSet
.setToBogus();
373 fBeforeSuffixInsert
.setToBogus();
377 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder
&output
, int leftIndex
, int rightIndex
,
378 UErrorCode
&status
) const {
379 // Currency spacing logic
381 if (rightIndex
- leftIndex
> 0 && !fAfterPrefixUnicodeSet
.isBogus() &&
382 fAfterPrefixUnicodeSet
.contains(output
.codePointAt(leftIndex
))) {
383 // TODO: Should we use the CURRENCY field here?
384 length
+= output
.insert(leftIndex
, fAfterPrefixInsert
, UNUM_FIELD_COUNT
, status
);
386 if (rightIndex
- leftIndex
> 0 && !fBeforeSuffixUnicodeSet
.isBogus() &&
387 fBeforeSuffixUnicodeSet
.contains(output
.codePointBefore(rightIndex
))) {
388 // TODO: Should we use the CURRENCY field here?
389 length
+= output
.insert(rightIndex
+ length
, fBeforeSuffixInsert
, UNUM_FIELD_COUNT
, status
);
392 // Call super for the remaining logic
393 length
+= ConstantMultiFieldModifier::apply(output
, leftIndex
, rightIndex
+ length
, status
);
398 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder
&output
, int32_t prefixStart
,
399 int32_t prefixLen
, int32_t suffixStart
,
401 const DecimalFormatSymbols
&symbols
,
402 UErrorCode
&status
) {
404 bool hasPrefix
= (prefixLen
> 0);
405 bool hasSuffix
= (suffixLen
> 0);
406 bool hasNumber
= (suffixStart
- prefixStart
- prefixLen
> 0); // could be empty string
407 if (hasPrefix
&& hasNumber
) {
408 length
+= applyCurrencySpacingAffix(output
, prefixStart
+ prefixLen
, PREFIX
, symbols
, status
);
410 if (hasSuffix
&& hasNumber
) {
411 length
+= applyCurrencySpacingAffix(output
, suffixStart
+ length
, SUFFIX
, symbols
, status
);
417 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder
&output
, int32_t index
,
419 const DecimalFormatSymbols
&symbols
,
420 UErrorCode
&status
) {
421 // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
422 // This works even if the last code point in the prefix is 2 code units because the
423 // field value gets populated to both indices in the field array.
424 Field affixField
= (affix
== PREFIX
) ? output
.fieldAt(index
- 1) : output
.fieldAt(index
);
425 if (affixField
!= UNUM_CURRENCY_FIELD
) {
428 int affixCp
= (affix
== PREFIX
) ? output
.codePointBefore(index
) : output
.codePointAt(index
);
429 UnicodeSet affixUniset
= getUnicodeSet(symbols
, IN_CURRENCY
, affix
, status
);
430 if (!affixUniset
.contains(affixCp
)) {
433 int numberCp
= (affix
== PREFIX
) ? output
.codePointAt(index
) : output
.codePointBefore(index
);
434 UnicodeSet numberUniset
= getUnicodeSet(symbols
, IN_NUMBER
, affix
, status
);
435 if (!numberUniset
.contains(numberCp
)) {
438 UnicodeString spacingString
= getInsertString(symbols
, affix
, status
);
440 // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
441 // It would be more efficient if this could be done before affixes were attached,
442 // so that it could be prepended/appended instead of inserted.
443 // However, the build code path is more efficient, and this is the most natural
444 // place to put currency spacing in the non-build code path.
445 // TODO: Should we use the CURRENCY field here?
446 return output
.insert(index
, spacingString
, UNUM_FIELD_COUNT
, status
);
450 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols
&symbols
, EPosition position
,
451 EAffix affix
, UErrorCode
&status
) {
452 // Ensure the static defaults are initialized:
453 umtx_initOnce(gDefaultCurrencySpacingInitOnce
, &initDefaultCurrencySpacing
, status
);
454 if (U_FAILURE(status
)) {
458 const UnicodeString
& pattern
= symbols
.getPatternForCurrencySpacing(
459 position
== IN_CURRENCY
? UNUM_CURRENCY_MATCH
: UNUM_CURRENCY_SURROUNDING_MATCH
,
462 if (pattern
.compare(u
"[:digit:]", -1) == 0) {
463 return *UNISET_DIGIT
;
464 } else if (pattern
.compare(u
"[:^S:]", -1) == 0) {
467 return UnicodeSet(pattern
, status
);
472 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols
&symbols
, EAffix affix
,
473 UErrorCode
&status
) {
474 return symbols
.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT
, affix
== SUFFIX
, status
);
477 #endif /* #if !UCONFIG_NO_FORMATTING */