2 * Copyright (C) 2015, International Business Machines
3 * Corporation and others. All Rights Reserved.
5 * file name: affixpatternparser.cpp
8 #include "unicode/utypes.h"
10 #if !UCONFIG_NO_FORMATTING
12 #include "unicode/dcfmtsym.h"
13 #include "unicode/plurrule.h"
14 #include "unicode/ucurr.h"
15 #include "affixpatternparser.h"
17 #include "precision.h"
19 #include "unistrappender.h"
21 static UChar gDefaultSymbols
[] = {0xa4, 0xa4, 0xa4};
23 static UChar gPercent
= 0x25;
24 static UChar gPerMill
= 0x2030;
25 static UChar gNegative
= 0x2D;
26 static UChar gPositive
= 0x2B;
28 #define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF)))
30 #define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F))
32 #define UNPACK_LONG(c) (((c) >> 8) & 0x80)
34 #define UNPACK_LENGTH(c) ((c) & 0xFF)
39 nextToken(const UChar
*buffer
, int32_t idx
, int32_t len
, UChar
*token
) {
40 if (buffer
[idx
] != 0x27 || idx
+ 1 == len
) {
44 *token
= buffer
[idx
+ 1];
45 if (buffer
[idx
+ 1] == 0xA4) {
47 for (; idx
+ i
< len
&& i
< 4 && buffer
[idx
+ i
] == buffer
[idx
+ 1]; ++i
);
54 nextUserToken(const UChar
*buffer
, int32_t idx
, int32_t len
, UChar
*token
) {
57 switch (buffer
[idx
]) {
69 for (; idx
+ i
< len
&& i
< max
&& buffer
[idx
+ i
] == buffer
[idx
]; ++i
);
73 CurrencyAffixInfo::CurrencyAffixInfo()
74 : fSymbol(gDefaultSymbols
, 1),
75 fISO(gDefaultSymbols
, 2),
76 fLong(DigitAffix(gDefaultSymbols
, 3)),
81 CurrencyAffixInfo::set(
83 const PluralRules
*rules
,
84 const UChar
*currency
,
86 if (U_FAILURE(status
)) {
90 if (currency
== NULL
) {
91 fSymbol
.setTo(gDefaultSymbols
, 1);
92 fISO
.setTo(gDefaultSymbols
, 2);
94 fLong
.append(gDefaultSymbols
, 3);
100 const UChar
*symbol
= ucurr_getName(
101 currency
, locale
, UCURR_SYMBOL_NAME
, &unusedIsChoice
,
103 if (U_FAILURE(status
)) {
106 fSymbol
.setTo(symbol
, len
);
107 fISO
.setTo(currency
, u_strlen(currency
));
109 StringEnumeration
* keywords
= rules
->getKeywords(status
);
110 if (U_FAILURE(status
)) {
113 const UnicodeString
* pluralCount
;
114 while ((pluralCount
= keywords
->snext(status
)) != NULL
) {
116 pCount
.appendInvariantChars(*pluralCount
, status
);
117 const UChar
*pluralName
= ucurr_getPluralName(
118 currency
, locale
, &unusedIsChoice
, pCount
.data(),
120 fLong
.setVariant(pCount
.data(), UnicodeString(pluralName
, len
), status
);
126 CurrencyAffixInfo::adjustPrecision(
127 const UChar
*currency
, const UCurrencyUsage usage
,
128 FixedPrecision
&precision
, UErrorCode
&status
) {
129 if (U_FAILURE(status
)) {
133 int32_t digitCount
= ucurr_getDefaultFractionDigitsForUsage(
134 currency
, usage
, &status
);
135 precision
.fMin
.setFracDigitCount(digitCount
);
136 precision
.fMax
.setFracDigitCount(digitCount
);
137 double increment
= ucurr_getRoundingIncrementForUsage(
138 currency
, usage
, &status
);
139 if (increment
== 0.0) {
140 precision
.fRoundingIncrement
.clear();
142 precision
.fRoundingIncrement
.set(increment
);
143 // guard against round-off error
144 precision
.fRoundingIncrement
.round(6);
149 AffixPattern::addLiteral(
150 const UChar
*literal
, int32_t start
, int32_t len
) {
151 char32Count
+= u_countChar32(literal
+ start
, len
);
152 literals
.append(literal
, start
, len
);
153 int32_t tlen
= tokens
.length();
154 // Takes 4 UChars to encode maximum literal length.
155 UChar
*tokenChars
= tokens
.getBuffer(tlen
+ 4);
157 // find start of literal size. May be tlen if there is no literal.
158 // While finding start of literal size, compute literal length
159 int32_t literalLength
= 0;
160 int32_t tLiteralStart
= tlen
;
161 while (tLiteralStart
> 0 && UNPACK_TOKEN(tokenChars
[tLiteralStart
- 1]) == kLiteral
) {
164 literalLength
|= UNPACK_LENGTH(tokenChars
[tLiteralStart
]);
166 // Add number of chars we just added to literal
167 literalLength
+= len
;
169 // Now encode the new length starting at tLiteralStart
170 tlen
= tLiteralStart
;
171 tokenChars
[tlen
++] = PACK_TOKEN_AND_LENGTH(kLiteral
, literalLength
& 0xFF);
173 while (literalLength
) {
174 tokenChars
[tlen
++] = PACK_TOKEN_AND_LENGTH(kLiteral
| 0x80, literalLength
& 0xFF);
177 tokens
.releaseBuffer(tlen
);
181 AffixPattern::add(ETokenType t
) {
186 AffixPattern::addCurrency(uint8_t count
) {
187 add(kCurrency
, count
);
191 AffixPattern::add(ETokenType t
, uint8_t count
) {
192 U_ASSERT(t
!= kLiteral
);
193 char32Count
+= count
;
196 hasCurrencyToken
= TRUE
;
199 hasPercentToken
= TRUE
;
202 hasPermillToken
= TRUE
;
208 tokens
.append(PACK_TOKEN_AND_LENGTH(t
, count
));
212 AffixPattern::append(const AffixPattern
&other
) {
213 AffixPatternIterator iter
;
214 other
.iterator(iter
);
215 UnicodeString literal
;
216 while (iter
.nextToken()) {
217 switch (iter
.getTokenType()) {
219 iter
.getLiteral(literal
);
220 addLiteral(literal
.getBuffer(), 0, literal
.length());
223 addCurrency(iter
.getTokenLength());
226 add(iter
.getTokenType());
234 AffixPattern::remove() {
237 hasCurrencyToken
= FALSE
;
238 hasPercentToken
= FALSE
;
239 hasPermillToken
= FALSE
;
243 // escapes literals for strings where special characters are NOT escaped
244 // except for apostrophe.
245 static void escapeApostropheInLiteral(
246 const UnicodeString
&literal
, UnicodeStringAppender
&appender
) {
247 int32_t len
= literal
.length();
248 const UChar
*buffer
= literal
.getBuffer();
249 for (int32_t i
= 0; i
< len
; ++i
) {
250 UChar ch
= buffer
[i
];
253 appender
.append((UChar
) 0x27);
254 appender
.append((UChar
) 0x27);
264 // escapes literals for user strings where special characters in literals
265 // are escaped with apostrophe.
266 static void escapeLiteral(
267 const UnicodeString
&literal
, UnicodeStringAppender
&appender
) {
268 int32_t len
= literal
.length();
269 const UChar
*buffer
= literal
.getBuffer();
270 for (int32_t i
= 0; i
< len
; ++i
) {
271 UChar ch
= buffer
[i
];
274 appender
.append((UChar
) 0x27);
275 appender
.append((UChar
) 0x27);
278 appender
.append((UChar
) 0x27);
279 appender
.append((UChar
) 0x25);
280 appender
.append((UChar
) 0x27);
283 appender
.append((UChar
) 0x27);
284 appender
.append((UChar
) 0x2030);
285 appender
.append((UChar
) 0x27);
288 appender
.append((UChar
) 0x27);
289 appender
.append((UChar
) 0xA4);
290 appender
.append((UChar
) 0x27);
293 appender
.append((UChar
) 0x27);
294 appender
.append((UChar
) 0x2D);
295 appender
.append((UChar
) 0x27);
298 appender
.append((UChar
) 0x27);
299 appender
.append((UChar
) 0x2B);
300 appender
.append((UChar
) 0x27);
310 AffixPattern::toString(UnicodeString
&appendTo
) const {
311 AffixPatternIterator iter
;
313 UnicodeStringAppender
appender(appendTo
);
314 UnicodeString literal
;
315 while (iter
.nextToken()) {
316 switch (iter
.getTokenType()) {
318 escapeApostropheInLiteral(iter
.getLiteral(literal
), appender
);
321 appender
.append((UChar
) 0x27);
322 appender
.append((UChar
) 0x25);
325 appender
.append((UChar
) 0x27);
326 appender
.append((UChar
) 0x2030);
330 appender
.append((UChar
) 0x27);
331 int32_t cl
= iter
.getTokenLength();
332 for (int32_t i
= 0; i
< cl
; ++i
) {
333 appender
.append((UChar
) 0xA4);
338 appender
.append((UChar
) 0x27);
339 appender
.append((UChar
) 0x2D);
342 appender
.append((UChar
) 0x27);
343 appender
.append((UChar
) 0x2B);
354 AffixPattern::toUserString(UnicodeString
&appendTo
) const {
355 AffixPatternIterator iter
;
357 UnicodeStringAppender
appender(appendTo
);
358 UnicodeString literal
;
359 while (iter
.nextToken()) {
360 switch (iter
.getTokenType()) {
362 escapeLiteral(iter
.getLiteral(literal
), appender
);
365 appender
.append((UChar
) 0x25);
368 appender
.append((UChar
) 0x2030);
372 int32_t cl
= iter
.getTokenLength();
373 for (int32_t i
= 0; i
< cl
; ++i
) {
374 appender
.append((UChar
) 0xA4);
379 appender
.append((UChar
) 0x2D);
382 appender
.append((UChar
) 0x2B);
392 class AffixPatternAppender
: public UMemory
{
394 AffixPatternAppender(AffixPattern
&dest
) : fDest(&dest
), fIdx(0) { }
396 inline void append(UChar x
) {
397 if (fIdx
== UPRV_LENGTHOF(fBuffer
)) {
398 fDest
->addLiteral(fBuffer
, 0, fIdx
);
404 inline void append(UChar32 x
) {
405 if (fIdx
>= UPRV_LENGTHOF(fBuffer
) - 1) {
406 fDest
->addLiteral(fBuffer
, 0, fIdx
);
409 U16_APPEND_UNSAFE(fBuffer
, fIdx
, x
);
412 inline void flush() {
414 fDest
->addLiteral(fBuffer
, 0, fIdx
);
420 * flush the buffer when we go out of scope.
422 ~AffixPatternAppender() {
429 AffixPatternAppender(const AffixPatternAppender
&other
);
430 AffixPatternAppender
&operator=(const AffixPatternAppender
&other
);
435 AffixPattern::parseUserAffixString(
436 const UnicodeString
&affixStr
,
437 AffixPattern
&appendTo
,
438 UErrorCode
&status
) {
439 if (U_FAILURE(status
)) {
442 int32_t len
= affixStr
.length();
443 const UChar
*buffer
= affixStr
.getBuffer();
444 // 0 = not quoted; 1 = quoted.
446 AffixPatternAppender
appender(appendTo
);
447 for (int32_t i
= 0; i
< len
; ) {
449 int32_t tokenSize
= nextUserToken(buffer
, i
, len
, &token
);
451 if (token
== 0x27 && tokenSize
== 1) { // quote
459 appendTo
.add(kPercent
, 1);
461 case 0x27: // double quote
462 appender
.append((UChar
) 0x27);
466 appendTo
.add(kPerMill
, 1);
470 appendTo
.add(kNegative
, 1);
474 appendTo
.add(kPositive
, 1);
478 appendTo
.add(kCurrency
, tokenSize
);
481 appender
.append(token
);
486 case 0x27: // double quote
487 appender
.append((UChar
) 0x27);
489 case 0xA4: // included b/c tokenSize can be > 1
490 for (int32_t j
= 0; j
< tokenSize
; ++j
) {
491 appender
.append((UChar
) 0xA4);
495 appender
.append(token
);
504 AffixPattern::parseAffixString(
505 const UnicodeString
&affixStr
,
506 AffixPattern
&appendTo
,
507 UErrorCode
&status
) {
508 if (U_FAILURE(status
)) {
511 int32_t len
= affixStr
.length();
512 const UChar
*buffer
= affixStr
.getBuffer();
513 for (int32_t i
= 0; i
< len
; ) {
515 int32_t tokenSize
= nextToken(buffer
, i
, len
, &token
);
516 if (tokenSize
== 1) {
517 int32_t literalStart
= i
;
519 while (i
< len
&& (tokenSize
= nextToken(buffer
, i
, len
, &token
)) == 1) {
522 appendTo
.addLiteral(buffer
, literalStart
, i
- literalStart
);
524 // If we reached end of string, we are done
532 appendTo
.add(kPercent
, 1);
535 appendTo
.add(kPerMill
, 1);
538 appendTo
.add(kNegative
, 1);
541 appendTo
.add(kPositive
, 1);
545 if (tokenSize
- 1 > 3) {
546 status
= U_PARSE_ERROR
;
549 appendTo
.add(kCurrency
, tokenSize
- 1);
553 appendTo
.addLiteral(&token
, 0, 1);
560 AffixPatternIterator
&
561 AffixPattern::iterator(AffixPatternIterator
&result
) const {
562 result
.nextLiteralIndex
= 0;
563 result
.lastLiteralLength
= 0;
564 result
.nextTokenIndex
= 0;
565 result
.tokens
= &tokens
;
566 result
.literals
= &literals
;
571 AffixPatternIterator::nextToken() {
572 int32_t tlen
= tokens
->length();
573 if (nextTokenIndex
== tlen
) {
577 const UChar
*tokenBuffer
= tokens
->getBuffer();
578 if (UNPACK_TOKEN(tokenBuffer
[nextTokenIndex
- 1]) ==
579 AffixPattern::kLiteral
) {
580 while (nextTokenIndex
< tlen
&&
581 UNPACK_LONG(tokenBuffer
[nextTokenIndex
])) {
584 lastLiteralLength
= 0;
585 int32_t i
= nextTokenIndex
- 1;
586 for (; UNPACK_LONG(tokenBuffer
[i
]); --i
) {
587 lastLiteralLength
<<= 8;
588 lastLiteralLength
|= UNPACK_LENGTH(tokenBuffer
[i
]);
590 lastLiteralLength
<<= 8;
591 lastLiteralLength
|= UNPACK_LENGTH(tokenBuffer
[i
]);
592 nextLiteralIndex
+= lastLiteralLength
;
597 AffixPattern::ETokenType
598 AffixPatternIterator::getTokenType() const {
599 return UNPACK_TOKEN(tokens
->charAt(nextTokenIndex
- 1));
603 AffixPatternIterator::getLiteral(UnicodeString
&result
) const {
604 const UChar
*buffer
= literals
->getBuffer();
605 result
.setTo(buffer
+ (nextLiteralIndex
- lastLiteralLength
), lastLiteralLength
);
610 AffixPatternIterator::getTokenLength() const {
611 const UChar
*tokenBuffer
= tokens
->getBuffer();
612 AffixPattern::ETokenType type
= UNPACK_TOKEN(tokenBuffer
[nextTokenIndex
- 1]);
613 return type
== AffixPattern::kLiteral
? lastLiteralLength
: UNPACK_LENGTH(tokenBuffer
[nextTokenIndex
- 1]);
616 AffixPatternParser::AffixPatternParser()
617 : fPercent(gPercent
), fPermill(gPerMill
), fNegative(gNegative
), fPositive(gPositive
) {
620 AffixPatternParser::AffixPatternParser(
621 const DecimalFormatSymbols
&symbols
) {
622 setDecimalFormatSymbols(symbols
);
626 AffixPatternParser::setDecimalFormatSymbols(
627 const DecimalFormatSymbols
&symbols
) {
628 fPercent
= symbols
.getConstSymbol(DecimalFormatSymbols::kPercentSymbol
);
629 fPermill
= symbols
.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol
);
630 fNegative
= symbols
.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol
);
631 fPositive
= symbols
.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol
);
635 AffixPatternParser::parse(
636 const AffixPattern
&affixPattern
,
637 const CurrencyAffixInfo
¤cyAffixInfo
,
638 PluralAffix
&appendTo
,
639 UErrorCode
&status
) const {
640 if (U_FAILURE(status
)) {
643 AffixPatternIterator iter
;
644 affixPattern
.iterator(iter
);
645 UnicodeString literal
;
646 while (iter
.nextToken()) {
647 switch (iter
.getTokenType()) {
648 case AffixPattern::kPercent
:
649 appendTo
.append(fPercent
, UNUM_PERCENT_FIELD
);
651 case AffixPattern::kPerMill
:
652 appendTo
.append(fPermill
, UNUM_PERMILL_FIELD
);
654 case AffixPattern::kNegative
:
655 appendTo
.append(fNegative
, UNUM_SIGN_FIELD
);
657 case AffixPattern::kPositive
:
658 appendTo
.append(fPositive
, UNUM_SIGN_FIELD
);
660 case AffixPattern::kCurrency
:
661 switch (iter
.getTokenLength()) {
664 currencyAffixInfo
.getSymbol(), UNUM_CURRENCY_FIELD
);
668 currencyAffixInfo
.getISO(), UNUM_CURRENCY_FIELD
);
672 currencyAffixInfo
.getLong(), UNUM_CURRENCY_FIELD
, status
);
679 case AffixPattern::kLiteral
:
680 appendTo
.append(iter
.getLiteral(literal
));
692 #endif /* #if !UCONFIG_NO_FORMATTING */