1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 * Copyright (C) 2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 * file name: affixpatternparser.cpp
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_FORMATTING
14 #include "unicode/dcfmtsym.h"
15 #include "unicode/plurrule.h"
16 #include "unicode/strenum.h"
17 #include "unicode/ucurr.h"
18 #include "unicode/ustring.h"
19 #include "affixpatternparser.h"
21 #include "precision.h"
23 #include "unistrappender.h"
25 static const UChar gDefaultSymbols
[] = {0xa4, 0xa4, 0xa4};
27 static const UChar gPercent
= 0x25;
28 static const UChar gPerMill
= 0x2030;
29 static const UChar gNegative
= 0x2D;
30 static const UChar gPositive
= 0x2B;
32 #define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF)))
34 #define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F))
36 #define UNPACK_LONG(c) (((c) >> 8) & 0x80)
38 #define UNPACK_LENGTH(c) ((c) & 0xFF)
43 nextToken(const UChar
*buffer
, int32_t idx
, int32_t len
, UChar
*token
) {
44 if (buffer
[idx
] != 0x27 || idx
+ 1 == len
) {
48 *token
= buffer
[idx
+ 1];
49 if (buffer
[idx
+ 1] == 0xA4) {
51 for (; idx
+ i
< len
&& i
< 4 && buffer
[idx
+ i
] == buffer
[idx
+ 1]; ++i
)
59 nextUserToken(const UChar
*buffer
, int32_t idx
, int32_t len
, UChar
*token
) {
62 switch (buffer
[idx
]) {
74 for (; idx
+ i
< len
&& i
< max
&& buffer
[idx
+ i
] == buffer
[idx
]; ++i
)
79 CurrencyAffixInfo::CurrencyAffixInfo()
80 : fSymbol(gDefaultSymbols
, 1),
81 fISO(gDefaultSymbols
, 2),
82 fLong(DigitAffix(gDefaultSymbols
, 3)),
87 CurrencyAffixInfo::set(
89 const PluralRules
*rules
,
90 const UChar
*currency
,
92 if (U_FAILURE(status
)) {
96 if (currency
== NULL
) {
97 fSymbol
.setTo(gDefaultSymbols
, 1);
98 fISO
.setTo(gDefaultSymbols
, 2);
100 fLong
.append(gDefaultSymbols
, 3);
105 UBool unusedIsChoice
;
106 const UChar
*symbol
= ucurr_getName(
107 currency
, locale
, UCURR_SYMBOL_NAME
, &unusedIsChoice
,
109 if (U_FAILURE(status
)) {
112 fSymbol
.setTo(symbol
, len
);
113 fISO
.setTo(currency
, u_strlen(currency
));
115 StringEnumeration
* keywords
= rules
->getKeywords(status
);
116 if (U_FAILURE(status
)) {
119 const UnicodeString
* pluralCount
;
120 while ((pluralCount
= keywords
->snext(status
)) != NULL
) {
122 pCount
.appendInvariantChars(*pluralCount
, status
);
123 const UChar
*pluralName
= ucurr_getPluralName(
124 currency
, locale
, &unusedIsChoice
, pCount
.data(),
126 fLong
.setVariant(pCount
.data(), UnicodeString(pluralName
, len
), status
);
132 CurrencyAffixInfo::adjustPrecision(
133 const UChar
*currency
, const UCurrencyUsage usage
,
134 FixedPrecision
&precision
, UErrorCode
&status
) {
135 if (U_FAILURE(status
)) {
139 int32_t digitCount
= ucurr_getDefaultFractionDigitsForUsage(
140 currency
, usage
, &status
);
141 precision
.fMin
.setFracDigitCount(digitCount
);
142 precision
.fMax
.setFracDigitCount(digitCount
);
143 double increment
= ucurr_getRoundingIncrementForUsage(
144 currency
, usage
, &status
);
145 if (increment
== 0.0) {
146 precision
.fRoundingIncrement
.clear();
148 precision
.fRoundingIncrement
.set(increment
);
149 // guard against round-off error
150 precision
.fRoundingIncrement
.round(6);
155 AffixPattern::addLiteral(
156 const UChar
*literal
, int32_t start
, int32_t len
) {
157 char32Count
+= u_countChar32(literal
+ start
, len
);
158 literals
.append(literal
, start
, len
);
159 int32_t tlen
= tokens
.length();
160 // Takes 4 UChars to encode maximum literal length.
161 UChar
*tokenChars
= tokens
.getBuffer(tlen
+ 4);
163 // find start of literal size. May be tlen if there is no literal.
164 // While finding start of literal size, compute literal length
165 int32_t literalLength
= 0;
166 int32_t tLiteralStart
= tlen
;
167 while (tLiteralStart
> 0 && UNPACK_TOKEN(tokenChars
[tLiteralStart
- 1]) == kLiteral
) {
170 literalLength
|= UNPACK_LENGTH(tokenChars
[tLiteralStart
]);
172 // Add number of chars we just added to literal
173 literalLength
+= len
;
175 // Now encode the new length starting at tLiteralStart
176 tlen
= tLiteralStart
;
177 tokenChars
[tlen
++] = PACK_TOKEN_AND_LENGTH(kLiteral
, literalLength
& 0xFF);
179 while (literalLength
) {
180 tokenChars
[tlen
++] = PACK_TOKEN_AND_LENGTH(kLiteral
| 0x80, literalLength
& 0xFF);
183 tokens
.releaseBuffer(tlen
);
187 AffixPattern::add(ETokenType t
) {
192 AffixPattern::addCurrency(uint8_t count
) {
193 add(kCurrency
, count
);
197 AffixPattern::add(ETokenType t
, uint8_t count
) {
198 U_ASSERT(t
!= kLiteral
);
199 char32Count
+= count
;
202 hasCurrencyToken
= TRUE
;
205 hasPercentToken
= TRUE
;
208 hasPermillToken
= TRUE
;
214 tokens
.append(PACK_TOKEN_AND_LENGTH(t
, count
));
218 AffixPattern::append(const AffixPattern
&other
) {
219 AffixPatternIterator iter
;
220 other
.iterator(iter
);
221 UnicodeString literal
;
222 while (iter
.nextToken()) {
223 switch (iter
.getTokenType()) {
225 iter
.getLiteral(literal
);
226 addLiteral(literal
.getBuffer(), 0, literal
.length());
229 addCurrency(static_cast<uint8_t>(iter
.getTokenLength()));
232 add(iter
.getTokenType());
240 AffixPattern::remove() {
243 hasCurrencyToken
= FALSE
;
244 hasPercentToken
= FALSE
;
245 hasPermillToken
= FALSE
;
249 // escapes literals for strings where special characters are NOT escaped
250 // except for apostrophe.
251 static void escapeApostropheInLiteral(
252 const UnicodeString
&literal
, UnicodeStringAppender
&appender
) {
253 int32_t len
= literal
.length();
254 const UChar
*buffer
= literal
.getBuffer();
255 for (int32_t i
= 0; i
< len
; ++i
) {
256 UChar ch
= buffer
[i
];
259 appender
.append((UChar
) 0x27);
260 appender
.append((UChar
) 0x27);
270 // escapes literals for user strings where special characters in literals
271 // are escaped with apostrophe.
272 static void escapeLiteral(
273 const UnicodeString
&literal
, UnicodeStringAppender
&appender
) {
274 int32_t len
= literal
.length();
275 const UChar
*buffer
= literal
.getBuffer();
276 for (int32_t i
= 0; i
< len
; ++i
) {
277 UChar ch
= buffer
[i
];
280 appender
.append((UChar
) 0x27);
281 appender
.append((UChar
) 0x27);
284 appender
.append((UChar
) 0x27);
285 appender
.append((UChar
) 0x25);
286 appender
.append((UChar
) 0x27);
289 appender
.append((UChar
) 0x27);
290 appender
.append((UChar
) 0x2030);
291 appender
.append((UChar
) 0x27);
294 appender
.append((UChar
) 0x27);
295 appender
.append((UChar
) 0xA4);
296 appender
.append((UChar
) 0x27);
299 appender
.append((UChar
) 0x27);
300 appender
.append((UChar
) 0x2D);
301 appender
.append((UChar
) 0x27);
304 appender
.append((UChar
) 0x27);
305 appender
.append((UChar
) 0x2B);
306 appender
.append((UChar
) 0x27);
316 AffixPattern::toString(UnicodeString
&appendTo
) const {
317 AffixPatternIterator iter
;
319 UnicodeStringAppender
appender(appendTo
);
320 UnicodeString literal
;
321 while (iter
.nextToken()) {
322 switch (iter
.getTokenType()) {
324 escapeApostropheInLiteral(iter
.getLiteral(literal
), appender
);
327 appender
.append((UChar
) 0x27);
328 appender
.append((UChar
) 0x25);
331 appender
.append((UChar
) 0x27);
332 appender
.append((UChar
) 0x2030);
336 appender
.append((UChar
) 0x27);
337 int32_t cl
= iter
.getTokenLength();
338 for (int32_t i
= 0; i
< cl
; ++i
) {
339 appender
.append((UChar
) 0xA4);
344 appender
.append((UChar
) 0x27);
345 appender
.append((UChar
) 0x2D);
348 appender
.append((UChar
) 0x27);
349 appender
.append((UChar
) 0x2B);
360 AffixPattern::toUserString(UnicodeString
&appendTo
) const {
361 AffixPatternIterator iter
;
363 UnicodeStringAppender
appender(appendTo
);
364 UnicodeString literal
;
365 while (iter
.nextToken()) {
366 switch (iter
.getTokenType()) {
368 escapeLiteral(iter
.getLiteral(literal
), appender
);
371 appender
.append((UChar
) 0x25);
374 appender
.append((UChar
) 0x2030);
378 int32_t cl
= iter
.getTokenLength();
379 for (int32_t i
= 0; i
< cl
; ++i
) {
380 appender
.append((UChar
) 0xA4);
385 appender
.append((UChar
) 0x2D);
388 appender
.append((UChar
) 0x2B);
398 class AffixPatternAppender
: public UMemory
{
400 AffixPatternAppender(AffixPattern
&dest
) : fDest(&dest
), fIdx(0) { }
402 inline void append(UChar x
) {
403 if (fIdx
== UPRV_LENGTHOF(fBuffer
)) {
404 fDest
->addLiteral(fBuffer
, 0, fIdx
);
410 inline void append(UChar32 x
) {
411 if (fIdx
>= UPRV_LENGTHOF(fBuffer
) - 1) {
412 fDest
->addLiteral(fBuffer
, 0, fIdx
);
415 U16_APPEND_UNSAFE(fBuffer
, fIdx
, x
);
418 inline void flush() {
420 fDest
->addLiteral(fBuffer
, 0, fIdx
);
426 * flush the buffer when we go out of scope.
428 ~AffixPatternAppender() {
435 AffixPatternAppender(const AffixPatternAppender
&other
);
436 AffixPatternAppender
&operator=(const AffixPatternAppender
&other
);
441 AffixPattern::parseUserAffixString(
442 const UnicodeString
&affixStr
,
443 AffixPattern
&appendTo
,
444 UErrorCode
&status
) {
445 if (U_FAILURE(status
)) {
448 int32_t len
= affixStr
.length();
449 const UChar
*buffer
= affixStr
.getBuffer();
450 // 0 = not quoted; 1 = quoted.
452 AffixPatternAppender
appender(appendTo
);
453 for (int32_t i
= 0; i
< len
; ) {
455 int32_t tokenSize
= nextUserToken(buffer
, i
, len
, &token
);
457 if (token
== 0x27 && tokenSize
== 1) { // quote
465 appendTo
.add(kPercent
, 1);
467 case 0x27: // double quote
468 appender
.append((UChar
) 0x27);
472 appendTo
.add(kPerMill
, 1);
476 appendTo
.add(kNegative
, 1);
480 appendTo
.add(kPositive
, 1);
484 appendTo
.add(kCurrency
, static_cast<uint8_t>(tokenSize
));
487 appender
.append(token
);
492 case 0x27: // double quote
493 appender
.append((UChar
) 0x27);
495 case 0xA4: // included b/c tokenSize can be > 1
496 for (int32_t j
= 0; j
< tokenSize
; ++j
) {
497 appender
.append((UChar
) 0xA4);
501 appender
.append(token
);
510 AffixPattern::parseAffixString(
511 const UnicodeString
&affixStr
,
512 AffixPattern
&appendTo
,
513 UErrorCode
&status
) {
514 if (U_FAILURE(status
)) {
517 int32_t len
= affixStr
.length();
518 const UChar
*buffer
= affixStr
.getBuffer();
519 for (int32_t i
= 0; i
< len
; ) {
521 int32_t tokenSize
= nextToken(buffer
, i
, len
, &token
);
522 if (tokenSize
== 1) {
523 int32_t literalStart
= i
;
525 while (i
< len
&& (tokenSize
= nextToken(buffer
, i
, len
, &token
)) == 1) {
528 appendTo
.addLiteral(buffer
, literalStart
, i
- literalStart
);
530 // If we reached end of string, we are done
538 appendTo
.add(kPercent
, 1);
541 appendTo
.add(kPerMill
, 1);
544 appendTo
.add(kNegative
, 1);
547 appendTo
.add(kPositive
, 1);
551 if (tokenSize
- 1 > 3) {
552 status
= U_PARSE_ERROR
;
555 appendTo
.add(kCurrency
, tokenSize
- 1);
559 appendTo
.addLiteral(&token
, 0, 1);
566 AffixPatternIterator
&
567 AffixPattern::iterator(AffixPatternIterator
&result
) const {
568 result
.nextLiteralIndex
= 0;
569 result
.lastLiteralLength
= 0;
570 result
.nextTokenIndex
= 0;
571 result
.tokens
= &tokens
;
572 result
.literals
= &literals
;
577 AffixPatternIterator::nextToken() {
578 int32_t tlen
= tokens
->length();
579 if (nextTokenIndex
== tlen
) {
583 const UChar
*tokenBuffer
= tokens
->getBuffer();
584 if (UNPACK_TOKEN(tokenBuffer
[nextTokenIndex
- 1]) ==
585 AffixPattern::kLiteral
) {
586 while (nextTokenIndex
< tlen
&&
587 UNPACK_LONG(tokenBuffer
[nextTokenIndex
])) {
590 lastLiteralLength
= 0;
591 int32_t i
= nextTokenIndex
- 1;
592 for (; UNPACK_LONG(tokenBuffer
[i
]); --i
) {
593 lastLiteralLength
<<= 8;
594 lastLiteralLength
|= UNPACK_LENGTH(tokenBuffer
[i
]);
596 lastLiteralLength
<<= 8;
597 lastLiteralLength
|= UNPACK_LENGTH(tokenBuffer
[i
]);
598 nextLiteralIndex
+= lastLiteralLength
;
603 AffixPattern::ETokenType
604 AffixPatternIterator::getTokenType() const {
605 return UNPACK_TOKEN(tokens
->charAt(nextTokenIndex
- 1));
609 AffixPatternIterator::getLiteral(UnicodeString
&result
) const {
610 const UChar
*buffer
= literals
->getBuffer();
611 result
.setTo(buffer
+ (nextLiteralIndex
- lastLiteralLength
), lastLiteralLength
);
616 AffixPatternIterator::getTokenLength() const {
617 const UChar
*tokenBuffer
= tokens
->getBuffer();
618 AffixPattern::ETokenType type
= UNPACK_TOKEN(tokenBuffer
[nextTokenIndex
- 1]);
619 return type
== AffixPattern::kLiteral
? lastLiteralLength
: UNPACK_LENGTH(tokenBuffer
[nextTokenIndex
- 1]);
622 AffixPatternParser::AffixPatternParser()
623 : fPercent(gPercent
), fPermill(gPerMill
), fNegative(gNegative
), fPositive(gPositive
) {
626 AffixPatternParser::AffixPatternParser(
627 const DecimalFormatSymbols
&symbols
) {
628 setDecimalFormatSymbols(symbols
);
632 AffixPatternParser::setDecimalFormatSymbols(
633 const DecimalFormatSymbols
&symbols
) {
634 fPercent
= symbols
.getConstSymbol(DecimalFormatSymbols::kPercentSymbol
);
635 fPermill
= symbols
.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol
);
636 fNegative
= symbols
.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol
);
637 fPositive
= symbols
.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol
);
641 AffixPatternParser::parse(
642 const AffixPattern
&affixPattern
,
643 const CurrencyAffixInfo
¤cyAffixInfo
,
644 PluralAffix
&appendTo
,
645 UErrorCode
&status
) const {
646 if (U_FAILURE(status
)) {
649 AffixPatternIterator iter
;
650 affixPattern
.iterator(iter
);
651 UnicodeString literal
;
652 while (iter
.nextToken()) {
653 switch (iter
.getTokenType()) {
654 case AffixPattern::kPercent
:
655 appendTo
.append(fPercent
, UNUM_PERCENT_FIELD
);
657 case AffixPattern::kPerMill
:
658 appendTo
.append(fPermill
, UNUM_PERMILL_FIELD
);
660 case AffixPattern::kNegative
:
661 appendTo
.append(fNegative
, UNUM_SIGN_FIELD
);
663 case AffixPattern::kPositive
:
664 appendTo
.append(fPositive
, UNUM_SIGN_FIELD
);
666 case AffixPattern::kCurrency
:
667 switch (iter
.getTokenLength()) {
670 currencyAffixInfo
.getSymbol(), UNUM_CURRENCY_FIELD
);
674 currencyAffixInfo
.getISO(), UNUM_CURRENCY_FIELD
);
678 currencyAffixInfo
.getLong(), UNUM_CURRENCY_FIELD
, status
);
685 case AffixPattern::kLiteral
:
686 appendTo
.append(iter
.getLiteral(literal
));
698 #endif /* #if !UCONFIG_NO_FORMATTING */