]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/number_affixutils.cpp
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 #include "number_affixutils.h"
9 #include "unicode/utf16.h"
10 #include "unicode/uniset.h"
13 using namespace icu::number
;
14 using namespace icu::number::impl
;
16 TokenConsumer::~TokenConsumer() = default;
17 SymbolProvider::~SymbolProvider() = default;
19 int32_t AffixUtils::estimateLength(const UnicodeString
&patternString
, UErrorCode
&status
) {
20 AffixPatternState state
= STATE_BASE
;
23 for (; offset
< patternString
.length();) {
24 UChar32 cp
= patternString
.char32At(offset
);
30 state
= STATE_FIRST_QUOTE
;
36 case STATE_FIRST_QUOTE
:
44 state
= STATE_INSIDE_QUOTE
;
47 case STATE_INSIDE_QUOTE
:
49 // End of quoted sequence
50 state
= STATE_AFTER_QUOTE
;
56 case STATE_AFTER_QUOTE
:
58 // Double quote inside of quoted sequence
60 state
= STATE_INSIDE_QUOTE
;
70 offset
+= U16_LENGTH(cp
);
74 case STATE_FIRST_QUOTE
:
75 case STATE_INSIDE_QUOTE
:
76 status
= U_ILLEGAL_ARGUMENT_ERROR
;
85 UnicodeString
AffixUtils::escape(const UnicodeString
&input
) {
86 AffixPatternState state
= STATE_BASE
;
89 for (; offset
< input
.length();) {
90 UChar32 cp
= input
.char32At(offset
);
94 output
.append(u
"''", -1);
102 if (state
== STATE_BASE
) {
103 output
.append(u
'\'');
105 state
= STATE_INSIDE_QUOTE
;
112 if (state
== STATE_INSIDE_QUOTE
) {
113 output
.append(u
'\'');
121 offset
+= U16_LENGTH(cp
);
124 if (state
== STATE_INSIDE_QUOTE
) {
125 output
.append(u
'\'');
131 Field
AffixUtils::getFieldForType(AffixPatternType type
) {
133 case TYPE_MINUS_SIGN
:
134 return UNUM_SIGN_FIELD
;
136 return UNUM_SIGN_FIELD
;
138 return UNUM_PERCENT_FIELD
;
140 return UNUM_PERMILL_FIELD
;
141 case TYPE_CURRENCY_SINGLE
:
142 return UNUM_CURRENCY_FIELD
;
143 case TYPE_CURRENCY_DOUBLE
:
144 return UNUM_CURRENCY_FIELD
;
145 case TYPE_CURRENCY_TRIPLE
:
146 return UNUM_CURRENCY_FIELD
;
147 case TYPE_CURRENCY_QUAD
:
148 return UNUM_CURRENCY_FIELD
;
149 case TYPE_CURRENCY_QUINT
:
150 return UNUM_CURRENCY_FIELD
;
151 case TYPE_CURRENCY_OVERFLOW
:
152 return UNUM_CURRENCY_FIELD
;
159 AffixUtils::unescape(const UnicodeString
&affixPattern
, NumberStringBuilder
&output
, int32_t position
,
160 const SymbolProvider
&provider
, Field field
, UErrorCode
&status
) {
163 while (hasNext(tag
, affixPattern
)) {
164 tag
= nextToken(tag
, affixPattern
, status
);
165 if (U_FAILURE(status
)) { return length
; }
166 if (tag
.type
== TYPE_CURRENCY_OVERFLOW
) {
167 // Don't go to the provider for this special case
168 length
+= output
.insertCodePoint(position
+ length
, 0xFFFD, UNUM_CURRENCY_FIELD
, status
);
169 } else if (tag
.type
< 0) {
170 length
+= output
.insert(
171 position
+ length
, provider
.getSymbol(tag
.type
), getFieldForType(tag
.type
), status
);
173 length
+= output
.insertCodePoint(position
+ length
, tag
.codePoint
, field
, status
);
179 int32_t AffixUtils::unescapedCodePointCount(const UnicodeString
&affixPattern
,
180 const SymbolProvider
&provider
, UErrorCode
&status
) {
183 while (hasNext(tag
, affixPattern
)) {
184 tag
= nextToken(tag
, affixPattern
, status
);
185 if (U_FAILURE(status
)) { return length
; }
186 if (tag
.type
== TYPE_CURRENCY_OVERFLOW
) {
188 } else if (tag
.type
< 0) {
189 length
+= provider
.getSymbol(tag
.type
).length();
191 length
+= U16_LENGTH(tag
.codePoint
);
198 AffixUtils::containsType(const UnicodeString
&affixPattern
, AffixPatternType type
, UErrorCode
&status
) {
199 if (affixPattern
.length() == 0) {
203 while (hasNext(tag
, affixPattern
)) {
204 tag
= nextToken(tag
, affixPattern
, status
);
205 if (U_FAILURE(status
)) { return false; }
206 if (tag
.type
== type
) {
213 bool AffixUtils::hasCurrencySymbols(const UnicodeString
&affixPattern
, UErrorCode
&status
) {
214 if (affixPattern
.length() == 0) {
218 while (hasNext(tag
, affixPattern
)) {
219 tag
= nextToken(tag
, affixPattern
, status
);
220 if (U_FAILURE(status
)) { return false; }
221 if (tag
.type
< 0 && getFieldForType(tag
.type
) == UNUM_CURRENCY_FIELD
) {
228 UnicodeString
AffixUtils::replaceType(const UnicodeString
&affixPattern
, AffixPatternType type
,
229 char16_t replacementChar
, UErrorCode
&status
) {
230 UnicodeString
output(affixPattern
); // copy
231 if (affixPattern
.length() == 0) {
235 while (hasNext(tag
, affixPattern
)) {
236 tag
= nextToken(tag
, affixPattern
, status
);
237 if (U_FAILURE(status
)) { return output
; }
238 if (tag
.type
== type
) {
239 output
.replace(tag
.offset
- 1, 1, replacementChar
);
245 bool AffixUtils::containsOnlySymbolsAndIgnorables(const UnicodeString
& affixPattern
,
246 const UnicodeSet
& ignorables
, UErrorCode
& status
) {
247 if (affixPattern
.length() == 0) {
251 while (hasNext(tag
, affixPattern
)) {
252 tag
= nextToken(tag
, affixPattern
, status
);
253 if (U_FAILURE(status
)) { return false; }
254 if (tag
.type
== TYPE_CODEPOINT
&& !ignorables
.contains(tag
.codePoint
)) {
261 void AffixUtils::iterateWithConsumer(const UnicodeString
& affixPattern
, TokenConsumer
& consumer
,
262 UErrorCode
& status
) {
263 if (affixPattern
.length() == 0) {
267 while (hasNext(tag
, affixPattern
)) {
268 tag
= nextToken(tag
, affixPattern
, status
);
269 if (U_FAILURE(status
)) { return; }
270 consumer
.consumeToken(tag
.type
, tag
.codePoint
, status
);
271 if (U_FAILURE(status
)) { return; }
275 AffixTag
AffixUtils::nextToken(AffixTag tag
, const UnicodeString
&patternString
, UErrorCode
&status
) {
276 int32_t offset
= tag
.offset
;
277 int32_t state
= tag
.state
;
278 for (; offset
< patternString
.length();) {
279 UChar32 cp
= patternString
.char32At(offset
);
280 int32_t count
= U16_LENGTH(cp
);
286 state
= STATE_FIRST_QUOTE
;
288 // continue to the next code point
291 return makeTag(offset
+ count
, TYPE_MINUS_SIGN
, STATE_BASE
, 0);
293 return makeTag(offset
+ count
, TYPE_PLUS_SIGN
, STATE_BASE
, 0);
295 return makeTag(offset
+ count
, TYPE_PERCENT
, STATE_BASE
, 0);
297 return makeTag(offset
+ count
, TYPE_PERMILLE
, STATE_BASE
, 0);
299 state
= STATE_FIRST_CURR
;
301 // continue to the next code point
304 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_BASE
, cp
);
307 case STATE_FIRST_QUOTE
:
309 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_BASE
, cp
);
311 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_INSIDE_QUOTE
, cp
);
313 case STATE_INSIDE_QUOTE
:
315 state
= STATE_AFTER_QUOTE
;
317 // continue to the next code point
320 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_INSIDE_QUOTE
, cp
);
322 case STATE_AFTER_QUOTE
:
324 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_INSIDE_QUOTE
, cp
);
327 // re-evaluate this code point
330 case STATE_FIRST_CURR
:
332 state
= STATE_SECOND_CURR
;
334 // continue to the next code point
337 return makeTag(offset
, TYPE_CURRENCY_SINGLE
, STATE_BASE
, 0);
339 case STATE_SECOND_CURR
:
341 state
= STATE_THIRD_CURR
;
343 // continue to the next code point
346 return makeTag(offset
, TYPE_CURRENCY_DOUBLE
, STATE_BASE
, 0);
348 case STATE_THIRD_CURR
:
350 state
= STATE_FOURTH_CURR
;
352 // continue to the next code point
355 return makeTag(offset
, TYPE_CURRENCY_TRIPLE
, STATE_BASE
, 0);
357 case STATE_FOURTH_CURR
:
359 state
= STATE_FIFTH_CURR
;
361 // continue to the next code point
364 return makeTag(offset
, TYPE_CURRENCY_QUAD
, STATE_BASE
, 0);
366 case STATE_FIFTH_CURR
:
368 state
= STATE_OVERFLOW_CURR
;
370 // continue to the next code point
373 return makeTag(offset
, TYPE_CURRENCY_QUINT
, STATE_BASE
, 0);
375 case STATE_OVERFLOW_CURR
:
378 // continue to the next code point and loop back to this state
381 return makeTag(offset
, TYPE_CURRENCY_OVERFLOW
, STATE_BASE
, 0);
390 // No more tokens in string.
392 case STATE_FIRST_QUOTE
:
393 case STATE_INSIDE_QUOTE
:
394 // For consistent behavior with the JDK and ICU 58, set an error here.
395 status
= U_ILLEGAL_ARGUMENT_ERROR
;
397 case STATE_AFTER_QUOTE
:
398 // No more tokens in string.
400 case STATE_FIRST_CURR
:
401 return makeTag(offset
, TYPE_CURRENCY_SINGLE
, STATE_BASE
, 0);
402 case STATE_SECOND_CURR
:
403 return makeTag(offset
, TYPE_CURRENCY_DOUBLE
, STATE_BASE
, 0);
404 case STATE_THIRD_CURR
:
405 return makeTag(offset
, TYPE_CURRENCY_TRIPLE
, STATE_BASE
, 0);
406 case STATE_FOURTH_CURR
:
407 return makeTag(offset
, TYPE_CURRENCY_QUAD
, STATE_BASE
, 0);
408 case STATE_FIFTH_CURR
:
409 return makeTag(offset
, TYPE_CURRENCY_QUINT
, STATE_BASE
, 0);
410 case STATE_OVERFLOW_CURR
:
411 return makeTag(offset
, TYPE_CURRENCY_OVERFLOW
, STATE_BASE
, 0);
417 bool AffixUtils::hasNext(const AffixTag
&tag
, const UnicodeString
&string
) {
418 // First check for the {-1} and default initializer syntax.
419 if (tag
.offset
< 0) {
421 } else if (tag
.offset
== 0) {
422 return string
.length() > 0;
424 // The rest of the fields are safe to use now.
425 // Special case: the last character in string is an end quote.
426 if (tag
.state
== STATE_INSIDE_QUOTE
&& tag
.offset
== string
.length() - 1 &&
427 string
.charAt(tag
.offset
) == u
'\'') {
429 } else if (tag
.state
!= STATE_BASE
) {
432 return tag
.offset
< string
.length();
436 #endif /* #if !UCONFIG_NO_FORMATTING */