1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
14 #include "number_types.h"
15 #include "number_patternstring.h"
16 #include "numparse_types.h"
17 #include "numparse_impl.h"
18 #include "numparse_symbols.h"
19 #include "numparse_decimal.h"
20 #include "unicode/numberformatter.h"
22 #include "number_mapper.h"
23 #include "static_unicode_sets.h"
26 using namespace icu::number
;
27 using namespace icu::number::impl
;
28 using namespace icu::numparse
;
29 using namespace icu::numparse::impl
;
32 NumberParseMatcher::~NumberParseMatcher() = default;
36 NumberParserImpl::createSimpleParser(const Locale
& locale
, const UnicodeString
& patternString
,
37 parse_flags_t parseFlags
, UErrorCode
& status
) {
39 LocalPointer
<NumberParserImpl
> parser(new NumberParserImpl(parseFlags
));
40 DecimalFormatSymbols
symbols(locale
, status
);
42 parser
->fLocalMatchers
.ignorables
= {unisets::DEFAULT_IGNORABLES
};
43 IgnorablesMatcher
& ignorables
= parser
->fLocalMatchers
.ignorables
;
45 DecimalFormatSymbols
dfs(locale
, status
);
46 dfs
.setSymbol(DecimalFormatSymbols::kCurrencySymbol
, u
"IU$");
47 dfs
.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol
, u
"ICU");
48 CurrencySymbols
currencySymbols({u
"ICU", status
}, locale
, dfs
, status
);
50 ParsedPatternInfo patternInfo
;
51 PatternParser::parseToPatternInfo(patternString
, patternInfo
, status
);
53 // The following statements set up the affix matchers.
54 AffixTokenMatcherSetupData affixSetupData
= {
55 currencySymbols
, symbols
, ignorables
, locale
, parseFlags
};
56 parser
->fLocalMatchers
.affixTokenMatcherWarehouse
= {&affixSetupData
};
57 parser
->fLocalMatchers
.affixMatcherWarehouse
= {&parser
->fLocalMatchers
.affixTokenMatcherWarehouse
};
58 parser
->fLocalMatchers
.affixMatcherWarehouse
.createAffixMatchers(
59 patternInfo
, *parser
, ignorables
, parseFlags
, status
);
61 Grouper grouper
= Grouper::forStrategy(UNUM_GROUPING_AUTO
);
62 grouper
.setLocaleData(patternInfo
, locale
);
64 parser
->addMatcher(parser
->fLocalMatchers
.ignorables
);
65 parser
->addMatcher(parser
->fLocalMatchers
.decimal
= {symbols
, grouper
, parseFlags
});
66 parser
->addMatcher(parser
->fLocalMatchers
.minusSign
= {symbols
, false});
67 parser
->addMatcher(parser
->fLocalMatchers
.plusSign
= {symbols
, false});
68 parser
->addMatcher(parser
->fLocalMatchers
.percent
= {symbols
});
69 parser
->addMatcher(parser
->fLocalMatchers
.permille
= {symbols
});
70 parser
->addMatcher(parser
->fLocalMatchers
.nan
= {symbols
});
71 parser
->addMatcher(parser
->fLocalMatchers
.infinity
= {symbols
});
72 parser
->addMatcher(parser
->fLocalMatchers
.padding
= {u
"@"});
73 parser
->addMatcher(parser
->fLocalMatchers
.scientific
= {symbols
, grouper
});
74 parser
->addMatcher(parser
->fLocalMatchers
.currency
= {currencySymbols
, symbols
, parseFlags
, status
});
75 // parser.addMatcher(new RequireNumberMatcher());
78 return parser
.orphan();
82 NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties
& properties
,
83 const DecimalFormatSymbols
& symbols
, bool parseCurrency
,
85 Locale locale
= symbols
.getLocale();
86 PropertiesAffixPatternProvider localPAPP
;
87 CurrencyPluralInfoAffixProvider localCPIAP
;
88 AffixPatternProvider
* affixProvider
;
89 if (properties
.currencyPluralInfo
.fPtr
.isNull()) {
90 localPAPP
.setTo(properties
, status
);
91 affixProvider
= &localPAPP
;
93 localCPIAP
.setTo(*properties
.currencyPluralInfo
.fPtr
, properties
, status
);
94 affixProvider
= &localCPIAP
;
96 if (affixProvider
== nullptr || U_FAILURE(status
)) { return nullptr; }
97 CurrencyUnit currency
= resolveCurrency(properties
, locale
, status
);
98 CurrencySymbols
currencySymbols(currency
, locale
, symbols
, status
);
99 bool isStrict
= properties
.parseMode
.getOrDefault(PARSE_MODE_STRICT
) == PARSE_MODE_STRICT
;
100 Grouper grouper
= Grouper::forProperties(properties
);
102 if (affixProvider
== nullptr || U_FAILURE(status
)) { return nullptr; }
103 if (!properties
.parseCaseSensitive
) {
104 parseFlags
|= PARSE_FLAG_IGNORE_CASE
;
106 if (properties
.parseIntegerOnly
) {
107 parseFlags
|= PARSE_FLAG_INTEGER_ONLY
;
109 if (properties
.signAlwaysShown
) {
110 parseFlags
|= PARSE_FLAG_PLUS_SIGN_ALLOWED
;
113 parseFlags
|= PARSE_FLAG_STRICT_GROUPING_SIZE
;
114 parseFlags
|= PARSE_FLAG_STRICT_SEPARATORS
;
115 parseFlags
|= PARSE_FLAG_USE_FULL_AFFIXES
;
116 parseFlags
|= PARSE_FLAG_EXACT_AFFIX
;
118 parseFlags
|= PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
;
120 if (grouper
.getPrimary() <= 0) {
121 parseFlags
|= PARSE_FLAG_GROUPING_DISABLED
;
123 if (parseCurrency
|| affixProvider
->hasCurrencySign()) {
124 parseFlags
|= PARSE_FLAG_MONETARY_SEPARATORS
;
126 if (!parseCurrency
) {
127 parseFlags
|= PARSE_FLAG_NO_FOREIGN_CURRENCY
;
130 LocalPointer
<NumberParserImpl
> parser(new NumberParserImpl(parseFlags
));
132 parser
->fLocalMatchers
.ignorables
= {
133 isStrict
? unisets::STRICT_IGNORABLES
: unisets::DEFAULT_IGNORABLES
};
134 IgnorablesMatcher
& ignorables
= parser
->fLocalMatchers
.ignorables
;
136 //////////////////////
137 /// AFFIX MATCHERS ///
138 //////////////////////
140 // The following statements set up the affix matchers.
141 AffixTokenMatcherSetupData affixSetupData
= {
142 currencySymbols
, symbols
, ignorables
, locale
, parseFlags
};
143 parser
->fLocalMatchers
.affixTokenMatcherWarehouse
= {&affixSetupData
};
144 parser
->fLocalMatchers
.affixMatcherWarehouse
= {&parser
->fLocalMatchers
.affixTokenMatcherWarehouse
};
145 parser
->fLocalMatchers
.affixMatcherWarehouse
.createAffixMatchers(
146 *affixProvider
, *parser
, ignorables
, parseFlags
, status
);
148 ////////////////////////
149 /// CURRENCY MATCHER ///
150 ////////////////////////
152 if (parseCurrency
|| affixProvider
->hasCurrencySign()) {
153 parser
->addMatcher(parser
->fLocalMatchers
.currency
= {currencySymbols
, symbols
, parseFlags
, status
});
160 // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern,
161 // and to maintain regressive behavior, divide by 100 even if no percent sign is present.
162 if (affixProvider
->containsSymbolType(AffixPatternType::TYPE_PERCENT
, status
)) {
163 parser
->addMatcher(parser
->fLocalMatchers
.percent
= {symbols
});
165 if (affixProvider
->containsSymbolType(AffixPatternType::TYPE_PERMILLE
, status
)) {
166 parser
->addMatcher(parser
->fLocalMatchers
.permille
= {symbols
});
169 ///////////////////////////////
170 /// OTHER STANDARD MATCHERS ///
171 ///////////////////////////////
174 parser
->addMatcher(parser
->fLocalMatchers
.plusSign
= {symbols
, false});
175 parser
->addMatcher(parser
->fLocalMatchers
.minusSign
= {symbols
, false});
177 parser
->addMatcher(parser
->fLocalMatchers
.nan
= {symbols
});
178 parser
->addMatcher(parser
->fLocalMatchers
.infinity
= {symbols
});
179 UnicodeString padString
= properties
.padString
;
180 if (!padString
.isBogus() && !ignorables
.getSet()->contains(padString
)) {
181 parser
->addMatcher(parser
->fLocalMatchers
.padding
= {padString
});
183 parser
->addMatcher(parser
->fLocalMatchers
.ignorables
);
184 parser
->addMatcher(parser
->fLocalMatchers
.decimal
= {symbols
, grouper
, parseFlags
});
185 // NOTE: parseNoExponent doesn't disable scientific parsing if we have a scientific formatter
186 if (!properties
.parseNoExponent
|| properties
.minimumExponentDigits
> 0) {
187 parser
->addMatcher(parser
->fLocalMatchers
.scientific
= {symbols
, grouper
});
194 parser
->addMatcher(parser
->fLocalValidators
.number
= {});
196 parser
->addMatcher(parser
->fLocalValidators
.affix
= {});
199 parser
->addMatcher(parser
->fLocalValidators
.currency
= {});
201 if (properties
.decimalPatternMatchRequired
) {
202 bool patternHasDecimalSeparator
=
203 properties
.decimalSeparatorAlwaysShown
|| properties
.maximumFractionDigits
!= 0;
204 parser
->addMatcher(parser
->fLocalValidators
.decimalSeparator
= {patternHasDecimalSeparator
});
206 // The multiplier takes care of scaling percentages.
207 Scale multiplier
= scaleFromProperties(properties
);
208 if (multiplier
.isValid()) {
209 parser
->addMatcher(parser
->fLocalValidators
.multiplier
= {multiplier
});
213 return parser
.orphan();
216 NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags
)
217 : fParseFlags(parseFlags
) {
220 NumberParserImpl::~NumberParserImpl() {
224 void NumberParserImpl::addMatcher(NumberParseMatcher
& matcher
) {
225 if (fNumMatchers
+ 1 > fMatchers
.getCapacity()) {
226 fMatchers
.resize(fNumMatchers
* 2, fNumMatchers
);
228 fMatchers
[fNumMatchers
] = &matcher
;
232 void NumberParserImpl::freeze() {
236 parse_flags_t
NumberParserImpl::getParseFlags() const {
240 void NumberParserImpl::parse(const UnicodeString
& input
, bool greedy
, ParsedNumber
& result
,
241 UErrorCode
& status
) const {
242 return parse(input
, 0, greedy
, result
, status
);
245 void NumberParserImpl::parse(const UnicodeString
& input
, int32_t start
, bool greedy
, ParsedNumber
& result
,
246 UErrorCode
& status
) const {
247 if (U_FAILURE(status
)) {
251 // TODO: Check start >= 0 and start < input.length()
252 StringSegment
segment(input
, 0 != (fParseFlags
& PARSE_FLAG_IGNORE_CASE
));
253 segment
.adjustOffset(start
);
255 parseGreedyRecursive(segment
, result
, status
);
257 parseLongestRecursive(segment
, result
, status
);
259 for (int32_t i
= 0; i
< fNumMatchers
; i
++) {
260 fMatchers
[i
]->postProcess(result
);
262 result
.postProcess();
265 void NumberParserImpl::parseGreedyRecursive(StringSegment
& segment
, ParsedNumber
& result
,
266 UErrorCode
& status
) const {
268 if (segment
.length() == 0) {
272 int initialOffset
= segment
.getOffset();
273 for (int32_t i
= 0; i
< fNumMatchers
; i
++) {
274 const NumberParseMatcher
* matcher
= fMatchers
[i
];
275 if (!matcher
->smokeTest(segment
)) {
278 matcher
->match(segment
, result
, status
);
279 if (U_FAILURE(status
)) {
282 if (segment
.getOffset() != initialOffset
) {
283 // In a greedy parse, recurse on only the first match.
284 parseGreedyRecursive(segment
, result
, status
);
285 // The following line resets the offset so that the StringSegment says the same across
287 // call boundary. Since we recurse only once, this line is not strictly necessary.
288 segment
.setOffset(initialOffset
);
293 // NOTE: If we get here, the greedy parse completed without consuming the entire string.
296 void NumberParserImpl::parseLongestRecursive(StringSegment
& segment
, ParsedNumber
& result
,
297 UErrorCode
& status
) const {
299 if (segment
.length() == 0) {
303 // TODO: Give a nice way for the matcher to reset the ParsedNumber?
304 ParsedNumber
initial(result
);
305 ParsedNumber candidate
;
307 int initialOffset
= segment
.getOffset();
308 for (int32_t i
= 0; i
< fNumMatchers
; i
++) {
309 const NumberParseMatcher
* matcher
= fMatchers
[i
];
310 if (!matcher
->smokeTest(segment
)) {
314 // In a non-greedy parse, we attempt all possible matches and pick the best.
315 for (int32_t charsToConsume
= 0; charsToConsume
< segment
.length();) {
316 charsToConsume
+= U16_LENGTH(segment
.codePointAt(charsToConsume
));
318 // Run the matcher on a segment of the current length.
320 segment
.setLength(charsToConsume
);
321 bool maybeMore
= matcher
->match(segment
, candidate
, status
);
322 segment
.resetLength();
323 if (U_FAILURE(status
)) {
327 // If the entire segment was consumed, recurse.
328 if (segment
.getOffset() - initialOffset
== charsToConsume
) {
329 parseLongestRecursive(segment
, candidate
, status
);
330 if (U_FAILURE(status
)) {
333 if (candidate
.isBetterThan(result
)) {
338 // Since the segment can be re-used, reset the offset.
339 // This does not have an effect if the matcher did not consume any chars.
340 segment
.setOffset(initialOffset
);
342 // Unless the matcher wants to see the next char, continue to the next matcher.
350 UnicodeString
NumberParserImpl::toString() const {
351 UnicodeString
result(u
"<NumberParserImpl matchers:[");
352 for (int32_t i
= 0; i
< fNumMatchers
; i
++) {
354 result
.append(fMatchers
[i
]->toString());
356 result
.append(u
" ]>", -1);
361 #endif /* #if !UCONFIG_NO_FORMATTING */