]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | #include "unicode/utypes.h" | |
5 | ||
6 | #if !UCONFIG_NO_FORMATTING | |
7 | ||
8 | #include "umutex.h" | |
9 | #include "ucln_cmn.h" | |
10 | #include "ucln_in.h" | |
11 | #include "number_modifiers.h" | |
12 | ||
13 | using namespace icu; | |
14 | using namespace icu::number; | |
15 | using namespace icu::number::impl; | |
16 | ||
17 | namespace { | |
18 | ||
19 | // TODO: This is copied from simpleformatter.cpp | |
20 | const int32_t ARG_NUM_LIMIT = 0x100; | |
21 | ||
22 | // These are the default currency spacing UnicodeSets in CLDR. | |
23 | // Pre-compute them for performance. | |
24 | // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR. | |
25 | icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER; | |
26 | ||
27 | UnicodeSet *UNISET_DIGIT = nullptr; | |
28 | UnicodeSet *UNISET_NOTS = nullptr; | |
29 | ||
30 | UBool U_CALLCONV cleanupDefaultCurrencySpacing() { | |
31 | delete UNISET_DIGIT; | |
32 | UNISET_DIGIT = nullptr; | |
33 | delete UNISET_NOTS; | |
34 | UNISET_NOTS = nullptr; | |
35 | gDefaultCurrencySpacingInitOnce.reset(); | |
36 | return TRUE; | |
37 | } | |
38 | ||
39 | void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) { | |
40 | ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing); | |
41 | UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status); | |
42 | UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status); | |
43 | if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) { | |
44 | status = U_MEMORY_ALLOCATION_ERROR; | |
45 | return; | |
46 | } | |
47 | UNISET_DIGIT->freeze(); | |
48 | UNISET_NOTS->freeze(); | |
49 | } | |
50 | ||
51 | } // namespace | |
52 | ||
53 | ||
54 | Modifier::~Modifier() = default; | |
55 | ||
56 | ||
57 | int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, | |
58 | UErrorCode &status) const { | |
59 | // Insert the suffix first since inserting the prefix will change the rightIndex | |
60 | int length = output.insert(rightIndex, fSuffix, fField, status); | |
61 | length += output.insert(leftIndex, fPrefix, fField, status); | |
62 | return length; | |
63 | } | |
64 | ||
65 | int32_t ConstantAffixModifier::getPrefixLength(UErrorCode &status) const { | |
66 | (void)status; | |
67 | return fPrefix.length(); | |
68 | } | |
69 | ||
70 | int32_t ConstantAffixModifier::getCodePointCount(UErrorCode &status) const { | |
71 | (void)status; | |
72 | return fPrefix.countChar32() + fSuffix.countChar32(); | |
73 | } | |
74 | ||
75 | bool ConstantAffixModifier::isStrong() const { | |
76 | return fStrong; | |
77 | } | |
78 | ||
79 | SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong) | |
80 | : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong) { | |
81 | int32_t argLimit = SimpleFormatter::getArgumentLimit( | |
82 | fCompiledPattern.getBuffer(), fCompiledPattern.length()); | |
83 | if (argLimit == 0) { | |
84 | // No arguments in compiled pattern | |
85 | fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; | |
86 | U_ASSERT(2 + fPrefixLength == fCompiledPattern.length()); | |
87 | // Set suffixOffset = -1 to indicate no arguments in compiled pattern. | |
88 | fSuffixOffset = -1; | |
89 | fSuffixLength = 0; | |
90 | } else { | |
91 | U_ASSERT(argLimit == 1); | |
92 | if (fCompiledPattern.charAt(1) != 0) { | |
93 | fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; | |
94 | fSuffixOffset = 3 + fPrefixLength; | |
95 | } else { | |
96 | fPrefixLength = 0; | |
97 | fSuffixOffset = 2; | |
98 | } | |
99 | if (3 + fPrefixLength < fCompiledPattern.length()) { | |
100 | fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; | |
101 | } else { | |
102 | fSuffixLength = 0; | |
103 | } | |
104 | } | |
105 | } | |
106 | ||
107 | SimpleModifier::SimpleModifier() | |
108 | : fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) { | |
109 | } | |
110 | ||
111 | int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, | |
112 | UErrorCode &status) const { | |
113 | return formatAsPrefixSuffix(output, leftIndex, rightIndex, fField, status); | |
114 | } | |
115 | ||
116 | int32_t SimpleModifier::getPrefixLength(UErrorCode &status) const { | |
117 | (void)status; | |
118 | return fPrefixLength; | |
119 | } | |
120 | ||
121 | int32_t SimpleModifier::getCodePointCount(UErrorCode &status) const { | |
122 | (void)status; | |
123 | int32_t count = 0; | |
124 | if (fPrefixLength > 0) { | |
125 | count += fCompiledPattern.countChar32(2, fPrefixLength); | |
126 | } | |
127 | if (fSuffixLength > 0) { | |
128 | count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength); | |
129 | } | |
130 | return count; | |
131 | } | |
132 | ||
133 | bool SimpleModifier::isStrong() const { | |
134 | return fStrong; | |
135 | } | |
136 | ||
137 | int32_t | |
138 | SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, | |
139 | Field field, UErrorCode &status) const { | |
140 | if (fSuffixOffset == -1) { | |
141 | // There is no argument for the inner number; overwrite the entire segment with our string. | |
142 | return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); | |
143 | } else { | |
144 | if (fPrefixLength > 0) { | |
145 | result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); | |
146 | } | |
147 | if (fSuffixLength > 0) { | |
148 | result.insert( | |
149 | endIndex + fPrefixLength, | |
150 | fCompiledPattern, | |
151 | 1 + fSuffixOffset, | |
152 | 1 + fSuffixOffset + fSuffixLength, | |
153 | field, | |
154 | status); | |
155 | } | |
156 | return fPrefixLength + fSuffixLength; | |
157 | } | |
158 | } | |
159 | ||
160 | int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, | |
161 | UErrorCode &status) const { | |
162 | int32_t length = output.insert(leftIndex, fPrefix, status); | |
163 | if (fOverwrite) { | |
164 | length += output.splice( | |
165 | leftIndex + length, | |
166 | rightIndex + length, | |
167 | UnicodeString(), 0, 0, | |
168 | UNUM_FIELD_COUNT, status); | |
169 | } | |
170 | length += output.insert(rightIndex + length, fSuffix, status); | |
171 | return length; | |
172 | } | |
173 | ||
174 | int32_t ConstantMultiFieldModifier::getPrefixLength(UErrorCode &status) const { | |
175 | (void)status; | |
176 | return fPrefix.length(); | |
177 | } | |
178 | ||
179 | int32_t ConstantMultiFieldModifier::getCodePointCount(UErrorCode &status) const { | |
180 | (void)status; | |
181 | return fPrefix.codePointCount() + fSuffix.codePointCount(); | |
182 | } | |
183 | ||
184 | bool ConstantMultiFieldModifier::isStrong() const { | |
185 | return fStrong; | |
186 | } | |
187 | ||
188 | CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, | |
189 | const NumberStringBuilder &suffix, | |
190 | bool overwrite, | |
191 | bool strong, | |
192 | const DecimalFormatSymbols &symbols, | |
193 | UErrorCode &status) | |
194 | : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) { | |
195 | // Check for currency spacing. Do not build the UnicodeSets unless there is | |
196 | // a currency code point at a boundary. | |
197 | if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) { | |
198 | int prefixCp = prefix.getLastCodePoint(); | |
199 | UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status); | |
200 | if (prefixUnicodeSet.contains(prefixCp)) { | |
201 | fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status); | |
202 | fAfterPrefixUnicodeSet.freeze(); | |
203 | fAfterPrefixInsert = getInsertString(symbols, PREFIX, status); | |
204 | } else { | |
205 | fAfterPrefixUnicodeSet.setToBogus(); | |
206 | fAfterPrefixInsert.setToBogus(); | |
207 | } | |
208 | } else { | |
209 | fAfterPrefixUnicodeSet.setToBogus(); | |
210 | fAfterPrefixInsert.setToBogus(); | |
211 | } | |
212 | if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) { | |
213 | int suffixCp = suffix.getLastCodePoint(); | |
214 | UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status); | |
215 | if (suffixUnicodeSet.contains(suffixCp)) { | |
216 | fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status); | |
217 | fBeforeSuffixUnicodeSet.freeze(); | |
218 | fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status); | |
219 | } else { | |
220 | fBeforeSuffixUnicodeSet.setToBogus(); | |
221 | fBeforeSuffixInsert.setToBogus(); | |
222 | } | |
223 | } else { | |
224 | fBeforeSuffixUnicodeSet.setToBogus(); | |
225 | fBeforeSuffixInsert.setToBogus(); | |
226 | } | |
227 | } | |
228 | ||
229 | int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, | |
230 | UErrorCode &status) const { | |
231 | // Currency spacing logic | |
232 | int length = 0; | |
233 | if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() && | |
234 | fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) { | |
235 | // TODO: Should we use the CURRENCY field here? | |
236 | length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status); | |
237 | } | |
238 | if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() && | |
239 | fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) { | |
240 | // TODO: Should we use the CURRENCY field here? | |
241 | length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status); | |
242 | } | |
243 | ||
244 | // Call super for the remaining logic | |
245 | length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status); | |
246 | return length; | |
247 | } | |
248 | ||
249 | int32_t | |
250 | CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart, | |
251 | int32_t prefixLen, int32_t suffixStart, | |
252 | int32_t suffixLen, | |
253 | const DecimalFormatSymbols &symbols, | |
254 | UErrorCode &status) { | |
255 | int length = 0; | |
256 | bool hasPrefix = (prefixLen > 0); | |
257 | bool hasSuffix = (suffixLen > 0); | |
258 | bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string | |
259 | if (hasPrefix && hasNumber) { | |
260 | length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status); | |
261 | } | |
262 | if (hasSuffix && hasNumber) { | |
263 | length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status); | |
264 | } | |
265 | return length; | |
266 | } | |
267 | ||
268 | int32_t | |
269 | CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index, | |
270 | EAffix affix, | |
271 | const DecimalFormatSymbols &symbols, | |
272 | UErrorCode &status) { | |
273 | // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix. | |
274 | // This works even if the last code point in the prefix is 2 code units because the | |
275 | // field value gets populated to both indices in the field array. | |
276 | Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index); | |
277 | if (affixField != UNUM_CURRENCY_FIELD) { | |
278 | return 0; | |
279 | } | |
280 | int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index); | |
281 | UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status); | |
282 | if (!affixUniset.contains(affixCp)) { | |
283 | return 0; | |
284 | } | |
285 | int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index); | |
286 | UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status); | |
287 | if (!numberUniset.contains(numberCp)) { | |
288 | return 0; | |
289 | } | |
290 | UnicodeString spacingString = getInsertString(symbols, affix, status); | |
291 | ||
292 | // NOTE: This next line *inserts* the spacing string, triggering an arraycopy. | |
293 | // It would be more efficient if this could be done before affixes were attached, | |
294 | // so that it could be prepended/appended instead of inserted. | |
295 | // However, the build code path is more efficient, and this is the most natural | |
296 | // place to put currency spacing in the non-build code path. | |
297 | // TODO: Should we use the CURRENCY field here? | |
298 | return output.insert(index, spacingString, UNUM_FIELD_COUNT, status); | |
299 | } | |
300 | ||
301 | UnicodeSet | |
302 | CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position, | |
303 | EAffix affix, UErrorCode &status) { | |
304 | // Ensure the static defaults are initialized: | |
305 | umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status); | |
306 | if (U_FAILURE(status)) { | |
307 | return UnicodeSet(); | |
308 | } | |
309 | ||
310 | const UnicodeString& pattern = symbols.getPatternForCurrencySpacing( | |
311 | position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH, | |
312 | affix == SUFFIX, | |
313 | status); | |
314 | if (pattern.compare(u"[:digit:]", -1) == 0) { | |
315 | return *UNISET_DIGIT; | |
316 | } else if (pattern.compare(u"[:^S:]", -1) == 0) { | |
317 | return *UNISET_NOTS; | |
318 | } else { | |
319 | return UnicodeSet(pattern, status); | |
320 | } | |
321 | } | |
322 | ||
323 | UnicodeString | |
324 | CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix, | |
325 | UErrorCode &status) { | |
326 | return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status); | |
327 | } | |
328 | ||
329 | #endif /* #if !UCONFIG_NO_FORMATTING */ |