]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/number_affixutils.cpp
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 #include "number_affixutils.h"
9 #include "unicode/utf16.h"
10 #include "unicode/uniset.h"
13 using namespace icu::number
;
14 using namespace icu::number::impl
;
16 TokenConsumer::~TokenConsumer() = default;
17 SymbolProvider::~SymbolProvider() = default;
19 int32_t AffixUtils::estimateLength(const UnicodeString
&patternString
, UErrorCode
&status
) {
20 AffixPatternState state
= STATE_BASE
;
23 for (; offset
< patternString
.length();) {
24 UChar32 cp
= patternString
.char32At(offset
);
30 state
= STATE_FIRST_QUOTE
;
36 case STATE_FIRST_QUOTE
:
44 state
= STATE_INSIDE_QUOTE
;
47 case STATE_INSIDE_QUOTE
:
49 // End of quoted sequence
50 state
= STATE_AFTER_QUOTE
;
56 case STATE_AFTER_QUOTE
:
58 // Double quote inside of quoted sequence
60 state
= STATE_INSIDE_QUOTE
;
70 offset
+= U16_LENGTH(cp
);
74 case STATE_FIRST_QUOTE
:
75 case STATE_INSIDE_QUOTE
:
76 status
= U_ILLEGAL_ARGUMENT_ERROR
;
85 UnicodeString
AffixUtils::escape(const UnicodeString
&input
) {
86 AffixPatternState state
= STATE_BASE
;
89 for (; offset
< input
.length();) {
90 UChar32 cp
= input
.char32At(offset
);
94 output
.append(u
"''", -1);
102 if (state
== STATE_BASE
) {
103 output
.append(u
'\'');
105 state
= STATE_INSIDE_QUOTE
;
112 if (state
== STATE_INSIDE_QUOTE
) {
113 output
.append(u
'\'');
121 offset
+= U16_LENGTH(cp
);
124 if (state
== STATE_INSIDE_QUOTE
) {
125 output
.append(u
'\'');
131 Field
AffixUtils::getFieldForType(AffixPatternType type
) {
133 case TYPE_MINUS_SIGN
:
134 return Field::UNUM_SIGN_FIELD
;
136 return Field::UNUM_SIGN_FIELD
;
138 return Field::UNUM_PERCENT_FIELD
;
140 return Field::UNUM_PERMILL_FIELD
;
141 case TYPE_CURRENCY_SINGLE
:
142 return Field::UNUM_CURRENCY_FIELD
;
143 case TYPE_CURRENCY_DOUBLE
:
144 return Field::UNUM_CURRENCY_FIELD
;
145 case TYPE_CURRENCY_TRIPLE
:
146 return Field::UNUM_CURRENCY_FIELD
;
147 case TYPE_CURRENCY_QUAD
:
148 return Field::UNUM_CURRENCY_FIELD
;
149 case TYPE_CURRENCY_QUINT
:
150 return Field::UNUM_CURRENCY_FIELD
;
151 case TYPE_CURRENCY_OVERFLOW
:
152 return Field::UNUM_CURRENCY_FIELD
;
155 return Field::UNUM_FIELD_COUNT
; // suppress "control reaches end of non-void function"
160 AffixUtils::unescape(const UnicodeString
&affixPattern
, NumberStringBuilder
&output
, int32_t position
,
161 const SymbolProvider
&provider
, UErrorCode
&status
) {
164 while (hasNext(tag
, affixPattern
)) {
165 tag
= nextToken(tag
, affixPattern
, status
);
166 if (U_FAILURE(status
)) { return length
; }
167 if (tag
.type
== TYPE_CURRENCY_OVERFLOW
) {
168 // Don't go to the provider for this special case
169 length
+= output
.insertCodePoint(position
+ length
, 0xFFFD, UNUM_CURRENCY_FIELD
, status
);
170 } else if (tag
.type
< 0) {
171 length
+= output
.insert(
172 position
+ length
, provider
.getSymbol(tag
.type
), getFieldForType(tag
.type
), status
);
174 length
+= output
.insertCodePoint(position
+ length
, tag
.codePoint
, UNUM_FIELD_COUNT
, status
);
180 int32_t AffixUtils::unescapedCodePointCount(const UnicodeString
&affixPattern
,
181 const SymbolProvider
&provider
, UErrorCode
&status
) {
184 while (hasNext(tag
, affixPattern
)) {
185 tag
= nextToken(tag
, affixPattern
, status
);
186 if (U_FAILURE(status
)) { return length
; }
187 if (tag
.type
== TYPE_CURRENCY_OVERFLOW
) {
189 } else if (tag
.type
< 0) {
190 length
+= provider
.getSymbol(tag
.type
).length();
192 length
+= U16_LENGTH(tag
.codePoint
);
199 AffixUtils::containsType(const UnicodeString
&affixPattern
, AffixPatternType type
, UErrorCode
&status
) {
200 if (affixPattern
.length() == 0) {
204 while (hasNext(tag
, affixPattern
)) {
205 tag
= nextToken(tag
, affixPattern
, status
);
206 if (U_FAILURE(status
)) { return false; }
207 if (tag
.type
== type
) {
214 bool AffixUtils::hasCurrencySymbols(const UnicodeString
&affixPattern
, UErrorCode
&status
) {
215 if (affixPattern
.length() == 0) {
219 while (hasNext(tag
, affixPattern
)) {
220 tag
= nextToken(tag
, affixPattern
, status
);
221 if (U_FAILURE(status
)) { return false; }
222 if (tag
.type
< 0 && getFieldForType(tag
.type
) == UNUM_CURRENCY_FIELD
) {
229 UnicodeString
AffixUtils::replaceType(const UnicodeString
&affixPattern
, AffixPatternType type
,
230 char16_t replacementChar
, UErrorCode
&status
) {
231 UnicodeString
output(affixPattern
); // copy
232 if (affixPattern
.length() == 0) {
236 while (hasNext(tag
, affixPattern
)) {
237 tag
= nextToken(tag
, affixPattern
, status
);
238 if (U_FAILURE(status
)) { return output
; }
239 if (tag
.type
== type
) {
240 output
.replace(tag
.offset
- 1, 1, replacementChar
);
246 bool AffixUtils::containsOnlySymbolsAndIgnorables(const UnicodeString
& affixPattern
,
247 const UnicodeSet
& ignorables
, UErrorCode
& status
) {
248 if (affixPattern
.length() == 0) {
252 while (hasNext(tag
, affixPattern
)) {
253 tag
= nextToken(tag
, affixPattern
, status
);
254 if (U_FAILURE(status
)) { return false; }
255 if (tag
.type
== TYPE_CODEPOINT
&& !ignorables
.contains(tag
.codePoint
)) {
262 void AffixUtils::iterateWithConsumer(const UnicodeString
& affixPattern
, TokenConsumer
& consumer
,
263 UErrorCode
& status
) {
264 if (affixPattern
.length() == 0) {
268 while (hasNext(tag
, affixPattern
)) {
269 tag
= nextToken(tag
, affixPattern
, status
);
270 if (U_FAILURE(status
)) { return; }
271 consumer
.consumeToken(tag
.type
, tag
.codePoint
, status
);
272 if (U_FAILURE(status
)) { return; }
276 AffixTag
AffixUtils::nextToken(AffixTag tag
, const UnicodeString
&patternString
, UErrorCode
&status
) {
277 int32_t offset
= tag
.offset
;
278 int32_t state
= tag
.state
;
279 for (; offset
< patternString
.length();) {
280 UChar32 cp
= patternString
.char32At(offset
);
281 int32_t count
= U16_LENGTH(cp
);
287 state
= STATE_FIRST_QUOTE
;
289 // continue to the next code point
292 return makeTag(offset
+ count
, TYPE_MINUS_SIGN
, STATE_BASE
, 0);
294 return makeTag(offset
+ count
, TYPE_PLUS_SIGN
, STATE_BASE
, 0);
296 return makeTag(offset
+ count
, TYPE_PERCENT
, STATE_BASE
, 0);
298 return makeTag(offset
+ count
, TYPE_PERMILLE
, STATE_BASE
, 0);
300 state
= STATE_FIRST_CURR
;
302 // continue to the next code point
305 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_BASE
, cp
);
308 case STATE_FIRST_QUOTE
:
310 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_BASE
, cp
);
312 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_INSIDE_QUOTE
, cp
);
314 case STATE_INSIDE_QUOTE
:
316 state
= STATE_AFTER_QUOTE
;
318 // continue to the next code point
321 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_INSIDE_QUOTE
, cp
);
323 case STATE_AFTER_QUOTE
:
325 return makeTag(offset
+ count
, TYPE_CODEPOINT
, STATE_INSIDE_QUOTE
, cp
);
328 // re-evaluate this code point
331 case STATE_FIRST_CURR
:
333 state
= STATE_SECOND_CURR
;
335 // continue to the next code point
338 return makeTag(offset
, TYPE_CURRENCY_SINGLE
, STATE_BASE
, 0);
340 case STATE_SECOND_CURR
:
342 state
= STATE_THIRD_CURR
;
344 // continue to the next code point
347 return makeTag(offset
, TYPE_CURRENCY_DOUBLE
, STATE_BASE
, 0);
349 case STATE_THIRD_CURR
:
351 state
= STATE_FOURTH_CURR
;
353 // continue to the next code point
356 return makeTag(offset
, TYPE_CURRENCY_TRIPLE
, STATE_BASE
, 0);
358 case STATE_FOURTH_CURR
:
360 state
= STATE_FIFTH_CURR
;
362 // continue to the next code point
365 return makeTag(offset
, TYPE_CURRENCY_QUAD
, STATE_BASE
, 0);
367 case STATE_FIFTH_CURR
:
369 state
= STATE_OVERFLOW_CURR
;
371 // continue to the next code point
374 return makeTag(offset
, TYPE_CURRENCY_QUINT
, STATE_BASE
, 0);
376 case STATE_OVERFLOW_CURR
:
379 // continue to the next code point and loop back to this state
382 return makeTag(offset
, TYPE_CURRENCY_OVERFLOW
, STATE_BASE
, 0);
391 // No more tokens in string.
393 case STATE_FIRST_QUOTE
:
394 case STATE_INSIDE_QUOTE
:
395 // For consistent behavior with the JDK and ICU 58, set an error here.
396 status
= U_ILLEGAL_ARGUMENT_ERROR
;
398 case STATE_AFTER_QUOTE
:
399 // No more tokens in string.
401 case STATE_FIRST_CURR
:
402 return makeTag(offset
, TYPE_CURRENCY_SINGLE
, STATE_BASE
, 0);
403 case STATE_SECOND_CURR
:
404 return makeTag(offset
, TYPE_CURRENCY_DOUBLE
, STATE_BASE
, 0);
405 case STATE_THIRD_CURR
:
406 return makeTag(offset
, TYPE_CURRENCY_TRIPLE
, STATE_BASE
, 0);
407 case STATE_FOURTH_CURR
:
408 return makeTag(offset
, TYPE_CURRENCY_QUAD
, STATE_BASE
, 0);
409 case STATE_FIFTH_CURR
:
410 return makeTag(offset
, TYPE_CURRENCY_QUINT
, STATE_BASE
, 0);
411 case STATE_OVERFLOW_CURR
:
412 return makeTag(offset
, TYPE_CURRENCY_OVERFLOW
, STATE_BASE
, 0);
415 return {-1}; // suppress "control reaches end of non-void function"
419 bool AffixUtils::hasNext(const AffixTag
&tag
, const UnicodeString
&string
) {
420 // First check for the {-1} and default initializer syntax.
421 if (tag
.offset
< 0) {
423 } else if (tag
.offset
== 0) {
424 return string
.length() > 0;
426 // The rest of the fields are safe to use now.
427 // Special case: the last character in string is an end quote.
428 if (tag
.state
== STATE_INSIDE_QUOTE
&& tag
.offset
== string
.length() - 1 &&
429 string
.charAt(tag
.offset
) == u
'\'') {
431 } else if (tag
.state
!= STATE_BASE
) {
434 return tag
.offset
< string
.length();
438 #endif /* #if !UCONFIG_NO_FORMATTING */