]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/number_modifiers.cpp
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / number_modifiers.cpp
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16
17 namespace {
18
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
26
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTS = nullptr;
29
30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31 delete UNISET_DIGIT;
32 UNISET_DIGIT = nullptr;
33 delete UNISET_NOTS;
34 UNISET_NOTS = nullptr;
35 gDefaultCurrencySpacingInitOnce.reset();
36 return TRUE;
37 }
38
39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40 ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41 UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42 UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status);
43 if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) {
44 status = U_MEMORY_ALLOCATION_ERROR;
45 return;
46 }
47 UNISET_DIGIT->freeze();
48 UNISET_NOTS->freeze();
49 }
50
51 } // namespace
52
53
54 Modifier::~Modifier() = default;
55
56
57 int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
58 UErrorCode &status) const {
59 // Insert the suffix first since inserting the prefix will change the rightIndex
60 int length = output.insert(rightIndex, fSuffix, fField, status);
61 length += output.insert(leftIndex, fPrefix, fField, status);
62 return length;
63 }
64
65 int32_t ConstantAffixModifier::getPrefixLength(UErrorCode &status) const {
66 (void)status;
67 return fPrefix.length();
68 }
69
70 int32_t ConstantAffixModifier::getCodePointCount(UErrorCode &status) const {
71 (void)status;
72 return fPrefix.countChar32() + fSuffix.countChar32();
73 }
74
75 bool ConstantAffixModifier::isStrong() const {
76 return fStrong;
77 }
78
79 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
80 : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong) {
81 int32_t argLimit = SimpleFormatter::getArgumentLimit(
82 fCompiledPattern.getBuffer(), fCompiledPattern.length());
83 if (argLimit == 0) {
84 // No arguments in compiled pattern
85 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
86 U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
87 // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
88 fSuffixOffset = -1;
89 fSuffixLength = 0;
90 } else {
91 U_ASSERT(argLimit == 1);
92 if (fCompiledPattern.charAt(1) != 0) {
93 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
94 fSuffixOffset = 3 + fPrefixLength;
95 } else {
96 fPrefixLength = 0;
97 fSuffixOffset = 2;
98 }
99 if (3 + fPrefixLength < fCompiledPattern.length()) {
100 fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
101 } else {
102 fSuffixLength = 0;
103 }
104 }
105 }
106
107 SimpleModifier::SimpleModifier()
108 : fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
109 }
110
111 int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
112 UErrorCode &status) const {
113 return formatAsPrefixSuffix(output, leftIndex, rightIndex, fField, status);
114 }
115
116 int32_t SimpleModifier::getPrefixLength(UErrorCode &status) const {
117 (void)status;
118 return fPrefixLength;
119 }
120
121 int32_t SimpleModifier::getCodePointCount(UErrorCode &status) const {
122 (void)status;
123 int32_t count = 0;
124 if (fPrefixLength > 0) {
125 count += fCompiledPattern.countChar32(2, fPrefixLength);
126 }
127 if (fSuffixLength > 0) {
128 count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
129 }
130 return count;
131 }
132
133 bool SimpleModifier::isStrong() const {
134 return fStrong;
135 }
136
137 int32_t
138 SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex,
139 Field field, UErrorCode &status) const {
140 if (fSuffixOffset == -1) {
141 // There is no argument for the inner number; overwrite the entire segment with our string.
142 return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status);
143 } else {
144 if (fPrefixLength > 0) {
145 result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status);
146 }
147 if (fSuffixLength > 0) {
148 result.insert(
149 endIndex + fPrefixLength,
150 fCompiledPattern,
151 1 + fSuffixOffset,
152 1 + fSuffixOffset + fSuffixLength,
153 field,
154 status);
155 }
156 return fPrefixLength + fSuffixLength;
157 }
158 }
159
160 int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
161 UErrorCode &status) const {
162 int32_t length = output.insert(leftIndex, fPrefix, status);
163 if (fOverwrite) {
164 length += output.splice(
165 leftIndex + length,
166 rightIndex + length,
167 UnicodeString(), 0, 0,
168 UNUM_FIELD_COUNT, status);
169 }
170 length += output.insert(rightIndex + length, fSuffix, status);
171 return length;
172 }
173
174 int32_t ConstantMultiFieldModifier::getPrefixLength(UErrorCode &status) const {
175 (void)status;
176 return fPrefix.length();
177 }
178
179 int32_t ConstantMultiFieldModifier::getCodePointCount(UErrorCode &status) const {
180 (void)status;
181 return fPrefix.codePointCount() + fSuffix.codePointCount();
182 }
183
184 bool ConstantMultiFieldModifier::isStrong() const {
185 return fStrong;
186 }
187
188 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix,
189 const NumberStringBuilder &suffix,
190 bool overwrite,
191 bool strong,
192 const DecimalFormatSymbols &symbols,
193 UErrorCode &status)
194 : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
195 // Check for currency spacing. Do not build the UnicodeSets unless there is
196 // a currency code point at a boundary.
197 if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) {
198 int prefixCp = prefix.getLastCodePoint();
199 UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
200 if (prefixUnicodeSet.contains(prefixCp)) {
201 fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
202 fAfterPrefixUnicodeSet.freeze();
203 fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
204 } else {
205 fAfterPrefixUnicodeSet.setToBogus();
206 fAfterPrefixInsert.setToBogus();
207 }
208 } else {
209 fAfterPrefixUnicodeSet.setToBogus();
210 fAfterPrefixInsert.setToBogus();
211 }
212 if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) {
213 int suffixCp = suffix.getLastCodePoint();
214 UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
215 if (suffixUnicodeSet.contains(suffixCp)) {
216 fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
217 fBeforeSuffixUnicodeSet.freeze();
218 fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
219 } else {
220 fBeforeSuffixUnicodeSet.setToBogus();
221 fBeforeSuffixInsert.setToBogus();
222 }
223 } else {
224 fBeforeSuffixUnicodeSet.setToBogus();
225 fBeforeSuffixInsert.setToBogus();
226 }
227 }
228
229 int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
230 UErrorCode &status) const {
231 // Currency spacing logic
232 int length = 0;
233 if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
234 fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
235 // TODO: Should we use the CURRENCY field here?
236 length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status);
237 }
238 if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
239 fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
240 // TODO: Should we use the CURRENCY field here?
241 length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status);
242 }
243
244 // Call super for the remaining logic
245 length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
246 return length;
247 }
248
249 int32_t
250 CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart,
251 int32_t prefixLen, int32_t suffixStart,
252 int32_t suffixLen,
253 const DecimalFormatSymbols &symbols,
254 UErrorCode &status) {
255 int length = 0;
256 bool hasPrefix = (prefixLen > 0);
257 bool hasSuffix = (suffixLen > 0);
258 bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
259 if (hasPrefix && hasNumber) {
260 length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
261 }
262 if (hasSuffix && hasNumber) {
263 length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
264 }
265 return length;
266 }
267
268 int32_t
269 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index,
270 EAffix affix,
271 const DecimalFormatSymbols &symbols,
272 UErrorCode &status) {
273 // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
274 // This works even if the last code point in the prefix is 2 code units because the
275 // field value gets populated to both indices in the field array.
276 Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
277 if (affixField != UNUM_CURRENCY_FIELD) {
278 return 0;
279 }
280 int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
281 UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
282 if (!affixUniset.contains(affixCp)) {
283 return 0;
284 }
285 int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
286 UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
287 if (!numberUniset.contains(numberCp)) {
288 return 0;
289 }
290 UnicodeString spacingString = getInsertString(symbols, affix, status);
291
292 // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
293 // It would be more efficient if this could be done before affixes were attached,
294 // so that it could be prepended/appended instead of inserted.
295 // However, the build code path is more efficient, and this is the most natural
296 // place to put currency spacing in the non-build code path.
297 // TODO: Should we use the CURRENCY field here?
298 return output.insert(index, spacingString, UNUM_FIELD_COUNT, status);
299 }
300
301 UnicodeSet
302 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
303 EAffix affix, UErrorCode &status) {
304 // Ensure the static defaults are initialized:
305 umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
306 if (U_FAILURE(status)) {
307 return UnicodeSet();
308 }
309
310 const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
311 position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
312 affix == SUFFIX,
313 status);
314 if (pattern.compare(u"[:digit:]", -1) == 0) {
315 return *UNISET_DIGIT;
316 } else if (pattern.compare(u"[:^S:]", -1) == 0) {
317 return *UNISET_NOTS;
318 } else {
319 return UnicodeSet(pattern, status);
320 }
321 }
322
323 UnicodeString
324 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
325 UErrorCode &status) {
326 return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
327 }
328
329 #endif /* #if !UCONFIG_NO_FORMATTING */