]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/number_patternstring.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / number_patternstring.cpp
CommitLineData
0f5d89e8
A
1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8// Allow implicit conversion from char16_t* to UnicodeString for this file:
9// Helpful in toString methods and elsewhere.
10#define UNISTR_FROM_STRING_EXPLICIT
11#define UNISTR_FROM_CHAR_EXPLICIT
12
13#include "uassert.h"
14#include "number_patternstring.h"
15#include "unicode/utf16.h"
16#include "number_utils.h"
17#include "number_roundingutils.h"
3d1f044b 18#include "number_mapper.h"
0f5d89e8
A
19
20using namespace icu;
21using namespace icu::number;
22using namespace icu::number::impl;
23
24
25void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
26 UErrorCode& status) {
27 patternInfo.consumePattern(patternString, status);
28}
29
30DecimalFormatProperties
31PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
32 UErrorCode& status) {
33 DecimalFormatProperties properties;
34 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
35 return properties;
36}
37
38DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern,
39 UErrorCode& status) {
40 return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status);
41}
42
43void
44PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
45 IgnoreRounding ignoreRounding, UErrorCode& status) {
46 parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
47}
48
49
50char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
51 const Endpoints& endpoints = getEndpoints(flags);
52 if (index < 0 || index >= endpoints.end - endpoints.start) {
3d1f044b 53 UPRV_UNREACHABLE;
0f5d89e8
A
54 }
55 return pattern.charAt(endpoints.start + index);
56}
57
58int32_t ParsedPatternInfo::length(int32_t flags) const {
59 return getLengthFromEndpoints(getEndpoints(flags));
60}
61
62int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
63 return endpoints.end - endpoints.start;
64}
65
66UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
67 const Endpoints& endpoints = getEndpoints(flags);
68 if (endpoints.start == endpoints.end) {
69 return UnicodeString();
70 }
71 // Create a new UnicodeString
72 return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
73}
74
75const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
76 bool prefix = (flags & AFFIX_PREFIX) != 0;
77 bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
78 bool padding = (flags & AFFIX_PADDING) != 0;
79 if (isNegative && padding) {
80 return negative.paddingEndpoints;
81 } else if (padding) {
82 return positive.paddingEndpoints;
83 } else if (prefix && isNegative) {
84 return negative.prefixEndpoints;
85 } else if (prefix) {
86 return positive.prefixEndpoints;
87 } else if (isNegative) {
88 return negative.suffixEndpoints;
89 } else {
90 return positive.suffixEndpoints;
91 }
92}
93
94bool ParsedPatternInfo::positiveHasPlusSign() const {
95 return positive.hasPlusSign;
96}
97
98bool ParsedPatternInfo::hasNegativeSubpattern() const {
99 return fHasNegativeSubpattern;
100}
101
102bool ParsedPatternInfo::negativeHasMinusSign() const {
103 return negative.hasMinusSign;
104}
105
106bool ParsedPatternInfo::hasCurrencySign() const {
107 return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
108}
109
110bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
111 return AffixUtils::containsType(pattern, type, status);
112}
113
114bool ParsedPatternInfo::hasBody() const {
115 return positive.integerTotal > 0;
116}
117
118/////////////////////////////////////////////////////
119/// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
120/////////////////////////////////////////////////////
121
122UChar32 ParsedPatternInfo::ParserState::peek() {
123 if (offset == pattern.length()) {
124 return -1;
125 } else {
126 return pattern.char32At(offset);
127 }
128}
129
130UChar32 ParsedPatternInfo::ParserState::next() {
131 int codePoint = peek();
132 offset += U16_LENGTH(codePoint);
133 return codePoint;
134}
135
136void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
137 if (U_FAILURE(status)) { return; }
138 this->pattern = patternString;
139
140 // This class is not intended for writing twice!
141 // Use move assignment to overwrite instead.
142 U_ASSERT(state.offset == 0);
143
144 // pattern := subpattern (';' subpattern)?
145 currentSubpattern = &positive;
146 consumeSubpattern(status);
147 if (U_FAILURE(status)) { return; }
148 if (state.peek() == u';') {
149 state.next(); // consume the ';'
150 // Don't consume the negative subpattern if it is empty (trailing ';')
151 if (state.peek() != -1) {
152 fHasNegativeSubpattern = true;
153 currentSubpattern = &negative;
154 consumeSubpattern(status);
155 if (U_FAILURE(status)) { return; }
156 }
157 }
158 if (state.peek() != -1) {
159 state.toParseException(u"Found unquoted special character");
160 status = U_UNQUOTED_SPECIAL;
161 }
162}
163
164void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
165 // subpattern := literals? number exponent? literals?
166 consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
167 if (U_FAILURE(status)) { return; }
168 consumeAffix(currentSubpattern->prefixEndpoints, status);
169 if (U_FAILURE(status)) { return; }
170 consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
171 if (U_FAILURE(status)) { return; }
172 consumeFormat(status);
173 if (U_FAILURE(status)) { return; }
174 consumeExponent(status);
175 if (U_FAILURE(status)) { return; }
176 consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
177 if (U_FAILURE(status)) { return; }
178 consumeAffix(currentSubpattern->suffixEndpoints, status);
179 if (U_FAILURE(status)) { return; }
180 consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
181 if (U_FAILURE(status)) { return; }
182}
183
184void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
185 if (state.peek() != u'*') {
186 return;
187 }
188 if (currentSubpattern->hasPadding) {
189 state.toParseException(u"Cannot have multiple pad specifiers");
190 status = U_MULTIPLE_PAD_SPECIFIERS;
191 return;
192 }
193 currentSubpattern->paddingLocation = paddingLocation;
194 currentSubpattern->hasPadding = true;
195 state.next(); // consume the '*'
196 currentSubpattern->paddingEndpoints.start = state.offset;
197 consumeLiteral(status);
198 currentSubpattern->paddingEndpoints.end = state.offset;
199}
200
201void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
202 // literals := { literal }
203 endpoints.start = state.offset;
204 while (true) {
205 switch (state.peek()) {
206 case u'#':
207 case u'@':
208 case u';':
209 case u'*':
210 case u'.':
211 case u',':
212 case u'0':
213 case u'1':
214 case u'2':
215 case u'3':
216 case u'4':
217 case u'5':
218 case u'6':
219 case u'7':
220 case u'8':
221 case u'9':
222 case -1:
223 // Characters that cannot appear unquoted in a literal
224 // break outer;
225 goto after_outer;
226
227 case u'%':
228 currentSubpattern->hasPercentSign = true;
229 break;
230
231 case u'‰':
232 currentSubpattern->hasPerMilleSign = true;
233 break;
234
235 case u'¤':
236 currentSubpattern->hasCurrencySign = true;
237 break;
238
239 case u'-':
240 currentSubpattern->hasMinusSign = true;
241 break;
242
243 case u'+':
244 currentSubpattern->hasPlusSign = true;
245 break;
246
247 default:
248 break;
249 }
250 consumeLiteral(status);
251 if (U_FAILURE(status)) { return; }
252 }
253 after_outer:
254 endpoints.end = state.offset;
255}
256
257void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
258 if (state.peek() == -1) {
259 state.toParseException(u"Expected unquoted literal but found EOL");
260 status = U_PATTERN_SYNTAX_ERROR;
261 return;
262 } else if (state.peek() == u'\'') {
263 state.next(); // consume the starting quote
264 while (state.peek() != u'\'') {
265 if (state.peek() == -1) {
266 state.toParseException(u"Expected quoted literal but found EOL");
267 status = U_PATTERN_SYNTAX_ERROR;
268 return;
269 } else {
270 state.next(); // consume a quoted character
271 }
272 }
273 state.next(); // consume the ending quote
274 } else {
275 // consume a non-quoted literal character
276 state.next();
277 }
278}
279
280void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
281 consumeIntegerFormat(status);
282 if (U_FAILURE(status)) { return; }
283 if (state.peek() == u'.') {
284 state.next(); // consume the decimal point
285 currentSubpattern->hasDecimal = true;
286 currentSubpattern->widthExceptAffixes += 1;
287 consumeFractionFormat(status);
288 if (U_FAILURE(status)) { return; }
289 }
290}
291
292void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
293 // Convenience reference:
294 ParsedSubpatternInfo& result = *currentSubpattern;
295
296 while (true) {
297 switch (state.peek()) {
298 case u',':
299 result.widthExceptAffixes += 1;
300 result.groupingSizes <<= 16;
301 break;
302
303 case u'#':
304 if (result.integerNumerals > 0) {
305 state.toParseException(u"# cannot follow 0 before decimal point");
306 status = U_UNEXPECTED_TOKEN;
307 return;
308 }
309 result.widthExceptAffixes += 1;
310 result.groupingSizes += 1;
311 if (result.integerAtSigns > 0) {
312 result.integerTrailingHashSigns += 1;
313 } else {
314 result.integerLeadingHashSigns += 1;
315 }
316 result.integerTotal += 1;
317 break;
318
319 case u'@':
320 if (result.integerNumerals > 0) {
321 state.toParseException(u"Cannot mix 0 and @");
322 status = U_UNEXPECTED_TOKEN;
323 return;
324 }
325 if (result.integerTrailingHashSigns > 0) {
326 state.toParseException(u"Cannot nest # inside of a run of @");
327 status = U_UNEXPECTED_TOKEN;
328 return;
329 }
330 result.widthExceptAffixes += 1;
331 result.groupingSizes += 1;
332 result.integerAtSigns += 1;
333 result.integerTotal += 1;
334 break;
335
336 case u'0':
337 case u'1':
338 case u'2':
339 case u'3':
340 case u'4':
341 case u'5':
342 case u'6':
343 case u'7':
344 case u'8':
345 case u'9':
346 if (result.integerAtSigns > 0) {
347 state.toParseException(u"Cannot mix @ and 0");
348 status = U_UNEXPECTED_TOKEN;
349 return;
350 }
351 result.widthExceptAffixes += 1;
352 result.groupingSizes += 1;
353 result.integerNumerals += 1;
354 result.integerTotal += 1;
355 if (!result.rounding.isZero() || state.peek() != u'0') {
356 result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true);
357 }
358 break;
359
360 default:
361 goto after_outer;
362 }
363 state.next(); // consume the symbol
364 }
365
366 after_outer:
367 // Disallow patterns with a trailing ',' or with two ',' next to each other
368 auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff);
369 auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff);
370 auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff);
371 if (grouping1 == 0 && grouping2 != -1) {
372 state.toParseException(u"Trailing grouping separator is invalid");
373 status = U_UNEXPECTED_TOKEN;
374 return;
375 }
376 if (grouping2 == 0 && grouping3 != -1) {
377 state.toParseException(u"Grouping width of zero is invalid");
378 status = U_PATTERN_SYNTAX_ERROR;
379 return;
380 }
381}
382
383void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
384 // Convenience reference:
385 ParsedSubpatternInfo& result = *currentSubpattern;
386
387 int32_t zeroCounter = 0;
388 while (true) {
389 switch (state.peek()) {
390 case u'#':
391 result.widthExceptAffixes += 1;
392 result.fractionHashSigns += 1;
393 result.fractionTotal += 1;
394 zeroCounter++;
395 break;
396
397 case u'0':
398 case u'1':
399 case u'2':
400 case u'3':
401 case u'4':
402 case u'5':
403 case u'6':
404 case u'7':
405 case u'8':
406 case u'9':
407 if (result.fractionHashSigns > 0) {
408 state.toParseException(u"0 cannot follow # after decimal point");
409 status = U_UNEXPECTED_TOKEN;
410 return;
411 }
412 result.widthExceptAffixes += 1;
413 result.fractionNumerals += 1;
414 result.fractionTotal += 1;
415 if (state.peek() == u'0') {
416 zeroCounter++;
417 } else {
418 result.rounding
419 .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false);
420 zeroCounter = 0;
421 }
422 break;
423
424 default:
425 return;
426 }
427 state.next(); // consume the symbol
428 }
429}
430
431void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
432 // Convenience reference:
433 ParsedSubpatternInfo& result = *currentSubpattern;
434
435 if (state.peek() != u'E') {
436 return;
437 }
438 if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) {
439 state.toParseException(u"Cannot have grouping separator in scientific notation");
440 status = U_MALFORMED_EXPONENTIAL_PATTERN;
441 return;
442 }
443 state.next(); // consume the E
444 result.widthExceptAffixes++;
445 if (state.peek() == u'+') {
446 state.next(); // consume the +
447 result.exponentHasPlusSign = true;
448 result.widthExceptAffixes++;
449 }
450 while (state.peek() == u'0') {
451 state.next(); // consume the 0
452 result.exponentZeros += 1;
453 result.widthExceptAffixes++;
454 }
455}
456
457///////////////////////////////////////////////////
458/// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
459///////////////////////////////////////////////////
460
461void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
462 DecimalFormatProperties& properties,
463 IgnoreRounding ignoreRounding, UErrorCode& status) {
464 if (pattern.length() == 0) {
465 // Backwards compatibility requires that we reset to the default values.
466 // TODO: Only overwrite the properties that "saveToProperties" normally touches?
467 properties.clear();
468 return;
469 }
470
471 ParsedPatternInfo patternInfo;
3d1f044b
A
472 if (pattern.compare(u"#.##E+00;-#.##E+00",18)==0) {
473 // Special hack for Numbers; if applying the Numbers-style scientific pattern,
474 // then replace it with the pattern beginning with 0 that would have resulted
475 // in older versions of ICU such as ICU 61. rdar://51601250
476 UnicodeString newPattern(TRUE,u"0.##E+00",8);
477 parseToPatternInfo(newPattern, patternInfo, status);
478 } else {
479 parseToPatternInfo(pattern, patternInfo, status);
480 }
0f5d89e8
A
481 if (U_FAILURE(status)) { return; }
482 patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
483}
484
485void
486PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
487 IgnoreRounding _ignoreRounding, UErrorCode& status) {
488 // Translate from PatternParseResult to Properties.
489 // Note that most data from "negative" is ignored per the specification of DecimalFormat.
490
491 const ParsedSubpatternInfo& positive = patternInfo.positive;
492
493 bool ignoreRounding;
494 if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
495 ignoreRounding = false;
496 } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
497 ignoreRounding = positive.hasCurrencySign;
498 } else {
499 U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
500 ignoreRounding = true;
501 }
502
503 // Grouping settings
504 auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff);
505 auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff);
506 auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff);
507 if (grouping2 != -1) {
508 properties.groupingSize = grouping1;
509 properties.groupingUsed = true;
510 } else {
511 properties.groupingSize = -1;
512 properties.groupingUsed = false;
513 }
514 if (grouping3 != -1) {
515 properties.secondaryGroupingSize = grouping2;
516 } else {
517 properties.secondaryGroupingSize = -1;
518 }
519
520 // For backwards compatibility, require that the pattern emit at least one min digit.
521 int minInt, minFrac;
522 if (positive.integerTotal == 0 && positive.fractionTotal > 0) {
523 // patterns like ".##"
524 minInt = 0;
525 minFrac = uprv_max(1, positive.fractionNumerals);
526 } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) {
527 // patterns like "#.##"
528 minInt = 1;
529 minFrac = 0;
530 } else {
531 minInt = positive.integerNumerals;
532 minFrac = positive.fractionNumerals;
533 }
534
535 // Rounding settings
536 // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
537 if (positive.integerAtSigns > 0) {
538 properties.minimumFractionDigits = -1;
539 properties.maximumFractionDigits = -1;
540 properties.roundingIncrement = 0.0;
541 properties.minimumSignificantDigits = positive.integerAtSigns;
542 properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
543 } else if (!positive.rounding.isZero()) {
544 if (!ignoreRounding) {
545 properties.minimumFractionDigits = minFrac;
546 properties.maximumFractionDigits = positive.fractionTotal;
547 properties.roundingIncrement = positive.rounding.toDouble();
548 } else {
549 properties.minimumFractionDigits = -1;
550 properties.maximumFractionDigits = -1;
551 properties.roundingIncrement = 0.0;
552 }
553 properties.minimumSignificantDigits = -1;
554 properties.maximumSignificantDigits = -1;
555 } else {
556 if (!ignoreRounding) {
557 properties.minimumFractionDigits = minFrac;
558 properties.maximumFractionDigits = positive.fractionTotal;
559 properties.roundingIncrement = 0.0;
560 } else {
561 properties.minimumFractionDigits = -1;
562 properties.maximumFractionDigits = -1;
563 properties.roundingIncrement = 0.0;
564 }
565 properties.minimumSignificantDigits = -1;
566 properties.maximumSignificantDigits = -1;
567 }
568
569 // If the pattern ends with a '.' then force the decimal point.
570 if (positive.hasDecimal && positive.fractionTotal == 0) {
571 properties.decimalSeparatorAlwaysShown = true;
572 } else {
573 properties.decimalSeparatorAlwaysShown = false;
574 }
575
576 // Scientific notation settings
577 if (positive.exponentZeros > 0) {
578 properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
579 properties.minimumExponentDigits = positive.exponentZeros;
580 if (positive.integerAtSigns == 0) {
581 // patterns without '@' can define max integer digits, used for engineering notation
582 properties.minimumIntegerDigits = positive.integerNumerals;
583 properties.maximumIntegerDigits = positive.integerTotal;
584 } else {
585 // patterns with '@' cannot define max integer digits
586 properties.minimumIntegerDigits = 1;
587 properties.maximumIntegerDigits = -1;
588 }
589 } else {
590 properties.exponentSignAlwaysShown = false;
591 properties.minimumExponentDigits = -1;
592 properties.minimumIntegerDigits = minInt;
593 properties.maximumIntegerDigits = -1;
594 }
595
596 // Compute the affix patterns (required for both padding and affixes)
597 UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
598 UnicodeString posSuffix = patternInfo.getString(0);
599
600 // Padding settings
601 if (positive.hasPadding) {
602 // The width of the positive prefix and suffix templates are included in the padding
603 int paddingWidth = positive.widthExceptAffixes +
604 AffixUtils::estimateLength(posPrefix, status) +
605 AffixUtils::estimateLength(posSuffix, status);
606 properties.formatWidth = paddingWidth;
607 UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
608 if (rawPaddingString.length() == 1) {
609 properties.padString = rawPaddingString;
610 } else if (rawPaddingString.length() == 2) {
611 if (rawPaddingString.charAt(0) == u'\'') {
612 properties.padString.setTo(u"'", -1);
613 } else {
614 properties.padString = rawPaddingString;
615 }
616 } else {
617 properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2);
618 }
619 properties.padPosition = positive.paddingLocation;
620 } else {
621 properties.formatWidth = -1;
622 properties.padString.setToBogus();
623 properties.padPosition.nullify();
624 }
625
626 // Set the affixes
627 // Always call the setter, even if the prefixes are empty, especially in the case of the
628 // negative prefix pattern, to prevent default values from overriding the pattern.
629 properties.positivePrefixPattern = posPrefix;
630 properties.positiveSuffixPattern = posSuffix;
631 if (patternInfo.fHasNegativeSubpattern) {
632 properties.negativePrefixPattern = patternInfo.getString(
633 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX);
634 properties.negativeSuffixPattern = patternInfo.getString(
635 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
636 } else {
637 properties.negativePrefixPattern.setToBogus();
638 properties.negativeSuffixPattern.setToBogus();
639 }
640
641 // Set the magnitude multiplier
642 if (positive.hasPercentSign) {
643 properties.magnitudeMultiplier = 2;
644 } else if (positive.hasPerMilleSign) {
645 properties.magnitudeMultiplier = 3;
646 } else {
647 properties.magnitudeMultiplier = 0;
648 }
649}
650
651///////////////////////////////////////////////////////////////////
652/// End PatternStringParser.java; begin PatternStringUtils.java ///
653///////////////////////////////////////////////////////////////////
654
3d1f044b
A
655// Determine whether a given roundingIncrement should be ignored for formatting
656// based on the current maxFrac value (maximum fraction digits). For example a
657// roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac
658// is 2 or more. Note that roundingIncrements are rounded in significance, so
659// a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e.
660// it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of
661// 0.005 is treated like 0.001 for significance). This is the reason for the
662// initial doubling below.
663// roundIncr must be non-zero.
664// Apple enhancement per rdar://51452216: Takes pointer to roundIncr; if function
665// returns false, roundIncr will be rounded as necessary given maxFrac value.
666bool PatternStringUtils::ignoreRoundingIncrement(double* roundIncrPtr, int32_t maxFrac) {
667 if (maxFrac < 0) {
668 return false;
669 }
670 int32_t frac = 0;
671 double denom = 20.0;
672 double roundIncr = *roundIncrPtr * 2.0;
673 for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0, denom *= 10.0);
674 if (frac <= maxFrac) {
675 *roundIncrPtr = (double)((int)(roundIncr*10.0))/denom;
676 return false;
677 }
678 return true;
679}
680
0f5d89e8
A
681UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
682 UErrorCode& status) {
683 UnicodeString sb;
684
685 // Convenience references
686 // The uprv_min() calls prevent DoS
3d1f044b
A
687 int32_t dosMax = 100;
688 int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax));
689 int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax));
690 bool useGrouping = properties.groupingUsed;
691 int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax);
0f5d89e8
A
692 NullableValue<PadPosition> paddingLocation = properties.padPosition;
693 UnicodeString paddingString = properties.padString;
3d1f044b
A
694 int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax));
695 int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
696 int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax));
697 int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
698 int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
699 int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
0f5d89e8 700 bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
3d1f044b 701 int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
0f5d89e8 702 bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
3d1f044b
A
703
704 PropertiesAffixPatternProvider affixes(properties, status);
0f5d89e8
A
705
706 // Prefixes
3d1f044b
A
707 sb.append(affixes.getString(AffixPatternProvider::AFFIX_POS_PREFIX));
708 int32_t afterPrefixPos = sb.length();
0f5d89e8
A
709
710 // Figure out the grouping sizes.
3d1f044b 711 if (!useGrouping) {
0f5d89e8
A
712 grouping1 = 0;
713 grouping2 = 0;
3d1f044b
A
714 } else if (grouping1 == grouping2) {
715 grouping1 = 0;
0f5d89e8 716 }
3d1f044b 717 int32_t groupingLength = grouping1 + grouping2 + 1;
0f5d89e8
A
718
719 // Figure out the digits we need to put in the pattern.
720 double roundingInterval = properties.roundingIncrement;
721 UnicodeString digitsString;
3d1f044b 722 int32_t digitsStringScale = 0;
0f5d89e8
A
723 if (maxSig != uprv_min(dosMax, -1)) {
724 // Significant Digits.
725 while (digitsString.length() < minSig) {
726 digitsString.append(u'@');
727 }
728 while (digitsString.length() < maxSig) {
729 digitsString.append(u'#');
730 }
3d1f044b 731 } else if (roundingInterval != 0.0 && !ignoreRoundingIncrement(&roundingInterval,maxFrac)) {
0f5d89e8 732 // Rounding Interval.
3d1f044b 733 digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr);
0f5d89e8
A
734 // TODO: Check for DoS here?
735 DecimalQuantity incrementQuantity;
736 incrementQuantity.setToDouble(roundingInterval);
737 incrementQuantity.adjustMagnitude(-digitsStringScale);
738 incrementQuantity.roundToMagnitude(0, kDefaultMode, status);
739 UnicodeString str = incrementQuantity.toPlainString();
740 if (str.charAt(0) == u'-') {
741 // TODO: Unsupported operation exception or fail silently?
742 digitsString.append(str, 1, str.length() - 1);
743 } else {
744 digitsString.append(str);
745 }
746 }
747 while (digitsString.length() + digitsStringScale < minInt) {
748 digitsString.insert(0, u'0');
749 }
750 while (-digitsStringScale < minFrac) {
751 digitsString.append(u'0');
752 digitsStringScale--;
753 }
754
755 // Write the digits to the string builder
3d1f044b 756 int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
0f5d89e8 757 m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1;
3d1f044b
A
758 int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
759 for (int32_t magnitude = m0; magnitude >= mN; magnitude--) {
760 int32_t di = digitsString.length() + digitsStringScale - magnitude - 1;
0f5d89e8
A
761 if (di < 0 || di >= digitsString.length()) {
762 sb.append(u'#');
763 } else {
764 sb.append(digitsString.charAt(di));
765 }
3d1f044b
A
766 // Decimal separator
767 if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) {
768 sb.append(u'.');
769 }
770 if (!useGrouping) {
771 continue;
772 }
773 // Least-significant grouping separator
774 if (magnitude > 0 && magnitude == grouping1) {
0f5d89e8 775 sb.append(u',');
3d1f044b
A
776 }
777 // All other grouping separators
778 if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) {
0f5d89e8 779 sb.append(u',');
0f5d89e8
A
780 }
781 }
782
783 // Exponential notation
784 if (exponentDigits != uprv_min(dosMax, -1)) {
785 sb.append(u'E');
786 if (exponentShowPlusSign) {
787 sb.append(u'+');
788 }
3d1f044b 789 for (int32_t i = 0; i < exponentDigits; i++) {
0f5d89e8
A
790 sb.append(u'0');
791 }
792 }
793
794 // Suffixes
3d1f044b
A
795 int32_t beforeSuffixPos = sb.length();
796 sb.append(affixes.getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
0f5d89e8
A
797
798 // Resolve Padding
3d1f044b 799 if (paddingWidth > 0 && !paddingLocation.isNull()) {
0f5d89e8
A
800 while (paddingWidth - sb.length() > 0) {
801 sb.insert(afterPrefixPos, u'#');
802 beforeSuffixPos++;
803 }
3d1f044b 804 int32_t addedLength;
0f5d89e8
A
805 switch (paddingLocation.get(status)) {
806 case PadPosition::UNUM_PAD_BEFORE_PREFIX:
807 addedLength = escapePaddingString(paddingString, sb, 0, status);
808 sb.insert(0, u'*');
809 afterPrefixPos += addedLength + 1;
810 beforeSuffixPos += addedLength + 1;
811 break;
812 case PadPosition::UNUM_PAD_AFTER_PREFIX:
813 addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
814 sb.insert(afterPrefixPos, u'*');
815 afterPrefixPos += addedLength + 1;
816 beforeSuffixPos += addedLength + 1;
817 break;
818 case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
819 escapePaddingString(paddingString, sb, beforeSuffixPos, status);
820 sb.insert(beforeSuffixPos, u'*');
821 break;
822 case PadPosition::UNUM_PAD_AFTER_SUFFIX:
823 sb.append(u'*');
824 escapePaddingString(paddingString, sb, sb.length(), status);
825 break;
826 }
827 if (U_FAILURE(status)) { return sb; }
828 }
829
830 // Negative affixes
831 // Ignore if the negative prefix pattern is "-" and the negative suffix is empty
3d1f044b 832 if (affixes.hasNegativeSubpattern()) {
0f5d89e8 833 sb.append(u';');
3d1f044b 834 sb.append(affixes.getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
0f5d89e8
A
835 // Copy the positive digit format into the negative.
836 // This is optional; the pattern is the same as if '#' were appended here instead.
837 // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy.
838 // See http://bugs.icu-project.org/trac/ticket/13707
839 UnicodeString copy(sb);
840 sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos);
3d1f044b 841 sb.append(affixes.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
0f5d89e8
A
842 }
843
844 return sb;
845}
846
847int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
848 UErrorCode& status) {
849 (void) status;
850 if (input.length() == 0) {
851 input.setTo(kFallbackPaddingString, -1);
852 }
853 int startLength = output.length();
854 if (input.length() == 1) {
855 if (input.compare(u"'", -1) == 0) {
856 output.insert(startIndex, u"''", -1);
857 } else {
858 output.insert(startIndex, input);
859 }
860 } else {
861 output.insert(startIndex, u'\'');
862 int offset = 1;
863 for (int i = 0; i < input.length(); i++) {
864 // it's okay to deal in chars here because the quote mark is the only interesting thing.
865 char16_t ch = input.charAt(i);
866 if (ch == u'\'') {
867 output.insert(startIndex + offset, u"''", -1);
868 offset += 2;
869 } else {
870 output.insert(startIndex + offset, ch);
871 offset += 1;
872 }
873 }
874 output.insert(startIndex + offset, u'\'');
875 }
876 return output.length() - startLength;
877}
878
879UnicodeString
880PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
881 bool toLocalized, UErrorCode& status) {
882 // Construct a table of strings to be converted between localized and standard.
883 static constexpr int32_t LEN = 21;
884 UnicodeString table[LEN][2];
885 int standIdx = toLocalized ? 0 : 1;
886 int localIdx = toLocalized ? 1 : 0;
887 table[0][standIdx] = u"%";
888 table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
889 table[1][standIdx] = u"‰";
890 table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
891 table[2][standIdx] = u".";
892 table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
893 table[3][standIdx] = u",";
894 table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
895 table[4][standIdx] = u"-";
896 table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
897 table[5][standIdx] = u"+";
898 table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
899 table[6][standIdx] = u";";
900 table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol);
901 table[7][standIdx] = u"@";
902 table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol);
903 table[8][standIdx] = u"E";
904 table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
905 table[9][standIdx] = u"*";
906 table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol);
907 table[10][standIdx] = u"#";
908 table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol);
909 for (int i = 0; i < 10; i++) {
910 table[11 + i][standIdx] = u'0' + i;
911 table[11 + i][localIdx] = symbols.getConstDigitSymbol(i);
912 }
913
914 // Special case: quotes are NOT allowed to be in any localIdx strings.
915 // Substitute them with '’' instead.
916 for (int32_t i = 0; i < LEN; i++) {
917 table[i][localIdx].findAndReplace(u'\'', u'’');
918 }
919
920 // Iterate through the string and convert.
921 // State table:
922 // 0 => base state
923 // 1 => first char inside a quoted sequence in input and output string
924 // 2 => inside a quoted sequence in input and output string
925 // 3 => first char after a close quote in input string;
926 // close quote still needs to be written to output string
927 // 4 => base state in input string; inside quoted sequence in output string
928 // 5 => first char inside a quoted sequence in input string;
929 // inside quoted sequence in output string
930 UnicodeString result;
931 int state = 0;
932 for (int offset = 0; offset < input.length(); offset++) {
933 UChar ch = input.charAt(offset);
934
935 // Handle a quote character (state shift)
936 if (ch == u'\'') {
937 if (state == 0) {
938 result.append(u'\'');
939 state = 1;
940 continue;
941 } else if (state == 1) {
942 result.append(u'\'');
943 state = 0;
944 continue;
945 } else if (state == 2) {
946 state = 3;
947 continue;
948 } else if (state == 3) {
949 result.append(u'\'');
950 result.append(u'\'');
951 state = 1;
952 continue;
953 } else if (state == 4) {
954 state = 5;
955 continue;
956 } else {
957 U_ASSERT(state == 5);
958 result.append(u'\'');
959 result.append(u'\'');
960 state = 4;
961 continue;
962 }
963 }
964
965 if (state == 0 || state == 3 || state == 4) {
966 for (auto& pair : table) {
967 // Perform a greedy match on this symbol string
968 UnicodeString temp = input.tempSubString(offset, pair[0].length());
969 if (temp == pair[0]) {
970 // Skip ahead past this region for the next iteration
971 offset += pair[0].length() - 1;
972 if (state == 3 || state == 4) {
973 result.append(u'\'');
974 state = 0;
975 }
976 result.append(pair[1]);
977 goto continue_outer;
978 }
979 }
980 // No replacement found. Check if a special quote is necessary
981 for (auto& pair : table) {
982 UnicodeString temp = input.tempSubString(offset, pair[1].length());
983 if (temp == pair[1]) {
984 if (state == 0) {
985 result.append(u'\'');
986 state = 4;
987 }
988 result.append(ch);
989 goto continue_outer;
990 }
991 }
992 // Still nothing. Copy the char verbatim. (Add a close quote if necessary)
993 if (state == 3 || state == 4) {
994 result.append(u'\'');
995 state = 0;
996 }
997 result.append(ch);
998 } else {
999 U_ASSERT(state == 1 || state == 2 || state == 5);
1000 result.append(ch);
1001 state = 2;
1002 }
1003 continue_outer:;
1004 }
1005 // Resolve final quotes
1006 if (state == 3 || state == 4) {
1007 result.append(u'\'');
1008 state = 0;
1009 }
1010 if (state != 0) {
1011 // Malformed localized pattern: unterminated quote
1012 status = U_PATTERN_SYNTAX_ERROR;
1013 }
1014 return result;
1015}
1016
1017void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
1018 int8_t signum, UNumberSignDisplay signDisplay,
1019 StandardPlural::Form plural,
1020 bool perMilleReplacesPercent, UnicodeString& output) {
1021
1022 // Should the output render '+' where '-' would normally appear in the pattern?
1023 bool plusReplacesMinusSign = signum != -1 && (
1024 signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || (
1025 signum == 1 && (
1026 signDisplay == UNUM_SIGN_EXCEPT_ZERO ||
1027 signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) &&
1028 patternInfo.positiveHasPlusSign() == false;
1029
1030 // Should we use the affix from the negative subpattern? (If not, we will use the positive
1031 // subpattern.)
1032 bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (
1033 signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
1034
1035 // Resolve the flags for the affix pattern.
1036 int flags = 0;
1037 if (useNegativeAffixPattern) {
1038 flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
1039 }
1040 if (isPrefix) {
1041 flags |= AffixPatternProvider::AFFIX_PREFIX;
1042 }
1043 if (plural != StandardPlural::Form::COUNT) {
1044 U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
1045 flags |= plural;
1046 }
1047
1048 // Should we prepend a sign to the pattern?
1049 bool prependSign;
1050 if (!isPrefix || useNegativeAffixPattern) {
1051 prependSign = false;
1052 } else if (signum == -1) {
1053 prependSign = signDisplay != UNUM_SIGN_NEVER;
1054 } else {
1055 prependSign = plusReplacesMinusSign;
1056 }
1057
1058 // Compute the length of the affix pattern.
1059 int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
1060
1061 // Finally, set the result into the StringBuilder.
1062 output.remove();
1063 for (int index = 0; index < length; index++) {
1064 char16_t candidate;
1065 if (prependSign && index == 0) {
1066 candidate = u'-';
1067 } else if (prependSign) {
1068 candidate = patternInfo.charAt(flags, index - 1);
1069 } else {
1070 candidate = patternInfo.charAt(flags, index);
1071 }
1072 if (plusReplacesMinusSign && candidate == u'-') {
1073 candidate = u'+';
1074 }
1075 if (perMilleReplacesPercent && candidate == u'%') {
1076 candidate = u'‰';
1077 }
1078 output.append(candidate);
1079 }
1080}
1081
1082#endif /* #if !UCONFIG_NO_FORMATTING */