1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
14 #include "number_types.h"
15 #include "number_patternstring.h"
16 #include "numparse_types.h"
17 #include "numparse_impl.h"
18 #include "numparse_symbols.h"
19 #include "numparse_decimal.h"
20 #include "unicode/numberformatter.h"
22 #include "number_mapper.h"
23 #include "static_unicode_sets.h"
26 using namespace icu::number
;
27 using namespace icu::number::impl
;
28 using namespace icu::numparse
;
29 using namespace icu::numparse::impl
;
32 NumberParseMatcher::~NumberParseMatcher() = default;
36 NumberParserImpl::createSimpleParser(const Locale
& locale
, const UnicodeString
& patternString
,
37 parse_flags_t parseFlags
, UErrorCode
& status
) {
39 LocalPointer
<NumberParserImpl
> parser(new NumberParserImpl(parseFlags
));
40 DecimalFormatSymbols
symbols(locale
, status
);
42 parser
->fLocalMatchers
.ignorables
= {unisets::DEFAULT_IGNORABLES
};
43 IgnorablesMatcher
& ignorables
= parser
->fLocalMatchers
.ignorables
;
45 DecimalFormatSymbols
dfs(locale
, status
);
46 dfs
.setSymbol(DecimalFormatSymbols::kCurrencySymbol
, u
"IU$");
47 dfs
.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol
, u
"ICU");
48 CurrencySymbols
currencySymbols({u
"ICU", status
}, locale
, dfs
, status
);
50 ParsedPatternInfo patternInfo
;
51 PatternParser::parseToPatternInfo(patternString
, patternInfo
, status
);
53 // The following statements set up the affix matchers.
54 AffixTokenMatcherSetupData affixSetupData
= {
55 currencySymbols
, symbols
, ignorables
, locale
, parseFlags
};
56 parser
->fLocalMatchers
.affixTokenMatcherWarehouse
= {&affixSetupData
};
57 parser
->fLocalMatchers
.affixMatcherWarehouse
= {&parser
->fLocalMatchers
.affixTokenMatcherWarehouse
};
58 parser
->fLocalMatchers
.affixMatcherWarehouse
.createAffixMatchers(
59 patternInfo
, *parser
, ignorables
, parseFlags
, status
);
61 Grouper grouper
= Grouper::forStrategy(UNUM_GROUPING_AUTO
);
62 grouper
.setLocaleData(patternInfo
, locale
);
64 parser
->addMatcher(parser
->fLocalMatchers
.ignorables
);
65 parser
->addMatcher(parser
->fLocalMatchers
.decimal
= {symbols
, grouper
, parseFlags
});
66 parser
->addMatcher(parser
->fLocalMatchers
.minusSign
= {symbols
, false});
67 parser
->addMatcher(parser
->fLocalMatchers
.plusSign
= {symbols
, false});
68 parser
->addMatcher(parser
->fLocalMatchers
.percent
= {symbols
});
69 parser
->addMatcher(parser
->fLocalMatchers
.permille
= {symbols
});
70 parser
->addMatcher(parser
->fLocalMatchers
.nan
= {symbols
});
71 parser
->addMatcher(parser
->fLocalMatchers
.infinity
= {symbols
});
72 parser
->addMatcher(parser
->fLocalMatchers
.padding
= {u
"@"});
73 parser
->addMatcher(parser
->fLocalMatchers
.scientific
= {symbols
, grouper
});
74 parser
->addMatcher(parser
->fLocalMatchers
.currency
= {currencySymbols
, symbols
, parseFlags
, status
});
75 parser
->addMatcher(parser
->fLocalValidators
.number
= {});
78 return parser
.orphan();
82 NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties
& properties
,
83 const DecimalFormatSymbols
& symbols
, bool parseCurrency
,
85 Locale locale
= symbols
.getLocale();
86 PropertiesAffixPatternProvider localPAPP
;
87 CurrencyPluralInfoAffixProvider localCPIAP
;
88 AffixPatternProvider
* affixProvider
;
89 if (properties
.currencyPluralInfo
.fPtr
.isNull()) {
90 localPAPP
.setTo(properties
, status
);
91 affixProvider
= &localPAPP
;
93 localCPIAP
.setTo(*properties
.currencyPluralInfo
.fPtr
, properties
, status
);
94 affixProvider
= &localCPIAP
;
96 if (affixProvider
== nullptr || U_FAILURE(status
)) { return nullptr; }
97 CurrencyUnit currency
= resolveCurrency(properties
, locale
, status
);
98 CurrencySymbols
currencySymbols(currency
, locale
, symbols
, status
);
99 bool isStrict
= properties
.parseMode
.getOrDefault(PARSE_MODE_STRICT
) == PARSE_MODE_STRICT
;
100 Grouper grouper
= Grouper::forProperties(properties
);
102 if (affixProvider
== nullptr || U_FAILURE(status
)) { return nullptr; }
103 if (!properties
.parseCaseSensitive
) {
104 parseFlags
|= PARSE_FLAG_IGNORE_CASE
;
106 if (properties
.parseIntegerOnly
) {
107 parseFlags
|= PARSE_FLAG_INTEGER_ONLY
;
109 if (properties
.signAlwaysShown
) {
110 parseFlags
|= PARSE_FLAG_PLUS_SIGN_ALLOWED
;
113 parseFlags
|= PARSE_FLAG_STRICT_GROUPING_SIZE
;
114 parseFlags
|= PARSE_FLAG_STRICT_SEPARATORS
;
115 parseFlags
|= PARSE_FLAG_USE_FULL_AFFIXES
;
116 parseFlags
|= PARSE_FLAG_EXACT_AFFIX
;
118 parseFlags
|= PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
;
120 if (grouper
.getPrimary() <= 0) {
121 parseFlags
|= PARSE_FLAG_GROUPING_DISABLED
;
123 if (parseCurrency
|| affixProvider
->hasCurrencySign()) {
124 parseFlags
|= PARSE_FLAG_MONETARY_SEPARATORS
;
125 // Apple <rdar://problem/51938595> check for curr symbol in suffix; use affix API instead?
126 if (properties
.positiveSuffixPattern
.indexOf(u
'¤') >= 0) {
127 parseFlags
|= PARSE_FLAG_HAS_TRAIL_CURRENCY
;
130 if (!parseCurrency
) {
131 parseFlags
|= PARSE_FLAG_NO_FOREIGN_CURRENCY
;
134 LocalPointer
<NumberParserImpl
> parser(new NumberParserImpl(parseFlags
));
136 parser
->fLocalMatchers
.ignorables
= {
137 isStrict
? unisets::STRICT_IGNORABLES
: unisets::DEFAULT_IGNORABLES
};
138 IgnorablesMatcher
& ignorables
= parser
->fLocalMatchers
.ignorables
;
140 //////////////////////
141 /// AFFIX MATCHERS ///
142 //////////////////////
144 // The following statements set up the affix matchers.
145 AffixTokenMatcherSetupData affixSetupData
= {
146 currencySymbols
, symbols
, ignorables
, locale
, parseFlags
};
147 parser
->fLocalMatchers
.affixTokenMatcherWarehouse
= {&affixSetupData
};
148 parser
->fLocalMatchers
.affixMatcherWarehouse
= {&parser
->fLocalMatchers
.affixTokenMatcherWarehouse
};
149 parser
->fLocalMatchers
.affixMatcherWarehouse
.createAffixMatchers(
150 *affixProvider
, *parser
, ignorables
, parseFlags
, status
);
152 ////////////////////////
153 /// CURRENCY MATCHER ///
154 ////////////////////////
156 if (parseCurrency
|| affixProvider
->hasCurrencySign()) {
157 parser
->addMatcher(parser
->fLocalMatchers
.currency
= {currencySymbols
, symbols
, parseFlags
, status
});
164 // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern,
165 // and to maintain regressive behavior, divide by 100 even if no percent sign is present.
166 if (!isStrict
&& affixProvider
->containsSymbolType(AffixPatternType::TYPE_PERCENT
, status
)) {
167 parser
->addMatcher(parser
->fLocalMatchers
.percent
= {symbols
});
169 if (!isStrict
&& affixProvider
->containsSymbolType(AffixPatternType::TYPE_PERMILLE
, status
)) {
170 parser
->addMatcher(parser
->fLocalMatchers
.permille
= {symbols
});
173 ///////////////////////////////
174 /// OTHER STANDARD MATCHERS ///
175 ///////////////////////////////
178 parser
->addMatcher(parser
->fLocalMatchers
.plusSign
= {symbols
, false});
179 parser
->addMatcher(parser
->fLocalMatchers
.minusSign
= {symbols
, false});
181 parser
->addMatcher(parser
->fLocalMatchers
.nan
= {symbols
});
182 parser
->addMatcher(parser
->fLocalMatchers
.infinity
= {symbols
});
183 UnicodeString padString
= properties
.padString
;
184 if (!padString
.isBogus() && !ignorables
.getSet()->contains(padString
)) {
185 parser
->addMatcher(parser
->fLocalMatchers
.padding
= {padString
});
187 parser
->addMatcher(parser
->fLocalMatchers
.ignorables
);
188 parser
->addMatcher(parser
->fLocalMatchers
.decimal
= {symbols
, grouper
, parseFlags
});
189 // NOTE: parseNoExponent doesn't disable scientific parsing if we have a scientific formatter
190 if (!properties
.parseNoExponent
|| properties
.minimumExponentDigits
> 0) {
191 parser
->addMatcher(parser
->fLocalMatchers
.scientific
= {symbols
, grouper
});
198 parser
->addMatcher(parser
->fLocalValidators
.number
= {});
200 parser
->addMatcher(parser
->fLocalValidators
.affix
= {});
203 parser
->addMatcher(parser
->fLocalValidators
.currency
= {});
205 if (properties
.decimalPatternMatchRequired
) {
206 bool patternHasDecimalSeparator
=
207 properties
.decimalSeparatorAlwaysShown
|| properties
.maximumFractionDigits
!= 0;
208 parser
->addMatcher(parser
->fLocalValidators
.decimalSeparator
= {patternHasDecimalSeparator
});
210 // The multiplier takes care of scaling percentages.
211 Scale multiplier
= scaleFromProperties(properties
);
212 if (multiplier
.isValid()) {
213 parser
->addMatcher(parser
->fLocalValidators
.multiplier
= {multiplier
});
217 return parser
.orphan();
220 NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags
)
221 : fParseFlags(parseFlags
) {
224 NumberParserImpl::~NumberParserImpl() {
228 void NumberParserImpl::addMatcher(NumberParseMatcher
& matcher
) {
229 if (fNumMatchers
+ 1 > fMatchers
.getCapacity()) {
230 fMatchers
.resize(fNumMatchers
* 2, fNumMatchers
);
232 fMatchers
[fNumMatchers
] = &matcher
;
236 void NumberParserImpl::freeze() {
240 parse_flags_t
NumberParserImpl::getParseFlags() const {
244 void NumberParserImpl::parse(const UnicodeString
& input
, bool greedy
, ParsedNumber
& result
,
245 UErrorCode
& status
) const {
246 return parse(input
, 0, greedy
, result
, status
);
249 void NumberParserImpl::parse(const UnicodeString
& input
, int32_t start
, bool greedy
, ParsedNumber
& result
,
250 UErrorCode
& status
) const {
251 if (U_FAILURE(status
)) {
255 // TODO: Check start >= 0 and start < input.length()
256 StringSegment
segment(input
, 0 != (fParseFlags
& PARSE_FLAG_IGNORE_CASE
));
257 segment
.adjustOffset(start
);
259 parseGreedy(segment
, result
, status
);
260 } else if (0 != (fParseFlags
& PARSE_FLAG_ALLOW_INFINITE_RECURSION
)) {
261 // Start at 1 so that recursionLevels never gets to 0
262 parseLongestRecursive(segment
, result
, 1, status
);
264 // Arbitrary recursion safety limit: 100 levels.
265 parseLongestRecursive(segment
, result
, -100, status
);
267 for (int32_t i
= 0; i
< fNumMatchers
; i
++) {
268 fMatchers
[i
]->postProcess(result
);
270 result
.postProcess();
273 void NumberParserImpl::parseGreedy(StringSegment
& segment
, ParsedNumber
& result
,
274 UErrorCode
& status
) const {
275 // Note: this method is not recursive in order to avoid stack overflow.
276 bool extraLoop
= FALSE
;
277 for (int i
= 0; i
<fNumMatchers
;) {
279 if (segment
.length() == 0) {
280 if ((extraLoop
&& i
==0) || (fParseFlags
& PARSE_FLAG_HAS_TRAIL_CURRENCY
) == 0 || result
.currencyCode
[0] != 0) { // Apple <rdar://problem/51938595>
283 // If we are parsing for currency expected at the end but have not found it yet,
284 // allow one more loop to see if we are matching an empty currency symbol
286 extraLoop
= TRUE
; // Apple <rdar://problem/51938595>
289 const NumberParseMatcher
* matcher
= fMatchers
[i
];
290 if (!matcher
->smokeTest(segment
)) {
291 // Matcher failed smoke test: try the next one
295 int32_t initialOffset
= segment
.getOffset();
296 matcher
->match(segment
, result
, status
);
297 if (U_FAILURE(status
)) {
300 if (segment
.getOffset() != initialOffset
||
301 (extraLoop
&& result
.currencyCode
[0] != 0)) { // Apple <rdar://problem/51938595>
302 // Greedy heuristic: accept the match and loop back
306 // Matcher did not match: try the next one
313 // NOTE: If we get here, the greedy parse completed without consuming the entire string.
316 void NumberParserImpl::parseLongestRecursive(StringSegment
& segment
, ParsedNumber
& result
,
317 int32_t recursionLevels
,
318 UErrorCode
& status
) const {
320 if (segment
.length() == 0) {
324 // Safety against stack overflow
325 if (recursionLevels
== 0) {
329 // TODO: Give a nice way for the matcher to reset the ParsedNumber?
330 ParsedNumber
initial(result
);
331 ParsedNumber candidate
;
333 int initialOffset
= segment
.getOffset();
334 for (int32_t i
= 0; i
< fNumMatchers
; i
++) {
335 const NumberParseMatcher
* matcher
= fMatchers
[i
];
336 if (!matcher
->smokeTest(segment
)) {
340 // In a non-greedy parse, we attempt all possible matches and pick the best.
341 for (int32_t charsToConsume
= 0; charsToConsume
< segment
.length();) {
342 charsToConsume
+= U16_LENGTH(segment
.codePointAt(charsToConsume
));
344 // Run the matcher on a segment of the current length.
346 segment
.setLength(charsToConsume
);
347 bool maybeMore
= matcher
->match(segment
, candidate
, status
);
348 segment
.resetLength();
349 if (U_FAILURE(status
)) {
353 // If the entire segment was consumed, recurse.
354 if (segment
.getOffset() - initialOffset
== charsToConsume
) {
355 parseLongestRecursive(segment
, candidate
, recursionLevels
+ 1, status
);
356 if (U_FAILURE(status
)) {
359 if (candidate
.isBetterThan(result
)) {
364 // Since the segment can be re-used, reset the offset.
365 // This does not have an effect if the matcher did not consume any chars.
366 segment
.setOffset(initialOffset
);
368 // Unless the matcher wants to see the next char, continue to the next matcher.
376 UnicodeString
NumberParserImpl::toString() const {
377 UnicodeString
result(u
"<NumberParserImpl matchers:[");
378 for (int32_t i
= 0; i
< fNumMatchers
; i
++) {
380 result
.append(fMatchers
[i
]->toString());
382 result
.append(u
" ]>", -1);
387 #endif /* #if !UCONFIG_NO_FORMATTING */