]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | #include "unicode/utypes.h" | |
5 | ||
6 | #if !UCONFIG_NO_FORMATTING | |
7 | ||
8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: | |
9 | // Helpful in toString methods and elsewhere. | |
10 | #define UNISTR_FROM_STRING_EXPLICIT | |
11 | ||
12 | #include <typeinfo> | |
13 | #include <array> | |
14 | #include "number_types.h" | |
15 | #include "number_patternstring.h" | |
16 | #include "numparse_types.h" | |
17 | #include "numparse_impl.h" | |
18 | #include "numparse_symbols.h" | |
19 | #include "numparse_decimal.h" | |
20 | #include "unicode/numberformatter.h" | |
21 | #include "cstr.h" | |
22 | #include "number_mapper.h" | |
23 | #include "static_unicode_sets.h" | |
24 | ||
25 | using namespace icu; | |
26 | using namespace icu::number; | |
27 | using namespace icu::number::impl; | |
28 | using namespace icu::numparse; | |
29 | using namespace icu::numparse::impl; | |
30 | ||
31 | ||
32 | NumberParseMatcher::~NumberParseMatcher() = default; | |
33 | ||
34 | ||
35 | NumberParserImpl* | |
36 | NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString, | |
37 | parse_flags_t parseFlags, UErrorCode& status) { | |
38 | ||
39 | LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags)); | |
40 | DecimalFormatSymbols symbols(locale, status); | |
41 | ||
42 | parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES}; | |
43 | IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables; | |
44 | ||
45 | DecimalFormatSymbols dfs(locale, status); | |
46 | dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$"); | |
47 | dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU"); | |
48 | CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); | |
49 | ||
50 | ParsedPatternInfo patternInfo; | |
51 | PatternParser::parseToPatternInfo(patternString, patternInfo, status); | |
52 | ||
53 | // The following statements set up the affix matchers. | |
54 | AffixTokenMatcherSetupData affixSetupData = { | |
55 | currencySymbols, symbols, ignorables, locale, parseFlags}; | |
56 | parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData}; | |
57 | parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse}; | |
58 | parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers( | |
59 | patternInfo, *parser, ignorables, parseFlags, status); | |
60 | ||
61 | Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO); | |
62 | grouper.setLocaleData(patternInfo, locale); | |
63 | ||
64 | parser->addMatcher(parser->fLocalMatchers.ignorables); | |
65 | parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags}); | |
66 | parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false}); | |
67 | parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false}); | |
68 | parser->addMatcher(parser->fLocalMatchers.percent = {symbols}); | |
69 | parser->addMatcher(parser->fLocalMatchers.permille = {symbols}); | |
70 | parser->addMatcher(parser->fLocalMatchers.nan = {symbols}); | |
71 | parser->addMatcher(parser->fLocalMatchers.infinity = {symbols}); | |
72 | parser->addMatcher(parser->fLocalMatchers.padding = {u"@"}); | |
73 | parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper}); | |
74 | parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status}); | |
75 | // parser.addMatcher(new RequireNumberMatcher()); | |
76 | ||
77 | parser->freeze(); | |
78 | return parser.orphan(); | |
79 | } | |
80 | ||
81 | NumberParserImpl* | |
82 | NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties& properties, | |
83 | const DecimalFormatSymbols& symbols, bool parseCurrency, | |
84 | UErrorCode& status) { | |
85 | Locale locale = symbols.getLocale(); | |
86 | PropertiesAffixPatternProvider localPAPP; | |
87 | CurrencyPluralInfoAffixProvider localCPIAP; | |
88 | AffixPatternProvider* affixProvider; | |
89 | if (properties.currencyPluralInfo.fPtr.isNull()) { | |
90 | localPAPP.setTo(properties, status); | |
91 | affixProvider = &localPAPP; | |
92 | } else { | |
93 | localCPIAP.setTo(*properties.currencyPluralInfo.fPtr, properties, status); | |
94 | affixProvider = &localCPIAP; | |
95 | } | |
96 | if (affixProvider == nullptr || U_FAILURE(status)) { return nullptr; } | |
97 | CurrencyUnit currency = resolveCurrency(properties, locale, status); | |
98 | CurrencySymbols currencySymbols(currency, locale, symbols, status); | |
99 | bool isStrict = properties.parseMode.getOrDefault(PARSE_MODE_STRICT) == PARSE_MODE_STRICT; | |
100 | Grouper grouper = Grouper::forProperties(properties); | |
101 | int parseFlags = 0; | |
102 | if (affixProvider == nullptr || U_FAILURE(status)) { return nullptr; } | |
103 | if (!properties.parseCaseSensitive) { | |
104 | parseFlags |= PARSE_FLAG_IGNORE_CASE; | |
105 | } | |
106 | if (properties.parseIntegerOnly) { | |
107 | parseFlags |= PARSE_FLAG_INTEGER_ONLY; | |
108 | } | |
109 | if (properties.signAlwaysShown) { | |
110 | parseFlags |= PARSE_FLAG_PLUS_SIGN_ALLOWED; | |
111 | } | |
112 | if (isStrict) { | |
113 | parseFlags |= PARSE_FLAG_STRICT_GROUPING_SIZE; | |
114 | parseFlags |= PARSE_FLAG_STRICT_SEPARATORS; | |
115 | parseFlags |= PARSE_FLAG_USE_FULL_AFFIXES; | |
116 | parseFlags |= PARSE_FLAG_EXACT_AFFIX; | |
117 | } else { | |
118 | parseFlags |= PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; | |
119 | } | |
120 | if (grouper.getPrimary() <= 0) { | |
121 | parseFlags |= PARSE_FLAG_GROUPING_DISABLED; | |
122 | } | |
123 | if (parseCurrency || affixProvider->hasCurrencySign()) { | |
124 | parseFlags |= PARSE_FLAG_MONETARY_SEPARATORS; | |
125 | } | |
126 | if (!parseCurrency) { | |
127 | parseFlags |= PARSE_FLAG_NO_FOREIGN_CURRENCY; | |
128 | } | |
129 | ||
130 | LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags)); | |
131 | ||
132 | parser->fLocalMatchers.ignorables = { | |
133 | isStrict ? unisets::STRICT_IGNORABLES : unisets::DEFAULT_IGNORABLES}; | |
134 | IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables; | |
135 | ||
136 | ////////////////////// | |
137 | /// AFFIX MATCHERS /// | |
138 | ////////////////////// | |
139 | ||
140 | // The following statements set up the affix matchers. | |
141 | AffixTokenMatcherSetupData affixSetupData = { | |
142 | currencySymbols, symbols, ignorables, locale, parseFlags}; | |
143 | parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData}; | |
144 | parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse}; | |
145 | parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers( | |
146 | *affixProvider, *parser, ignorables, parseFlags, status); | |
147 | ||
148 | //////////////////////// | |
149 | /// CURRENCY MATCHER /// | |
150 | //////////////////////// | |
151 | ||
152 | if (parseCurrency || affixProvider->hasCurrencySign()) { | |
153 | parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status}); | |
154 | } | |
155 | ||
156 | /////////////// | |
157 | /// PERCENT /// | |
158 | /////////////// | |
159 | ||
160 | // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern, | |
161 | // and to maintain regressive behavior, divide by 100 even if no percent sign is present. | |
162 | if (affixProvider->containsSymbolType(AffixPatternType::TYPE_PERCENT, status)) { | |
163 | parser->addMatcher(parser->fLocalMatchers.percent = {symbols}); | |
164 | } | |
165 | if (affixProvider->containsSymbolType(AffixPatternType::TYPE_PERMILLE, status)) { | |
166 | parser->addMatcher(parser->fLocalMatchers.permille = {symbols}); | |
167 | } | |
168 | ||
169 | /////////////////////////////// | |
170 | /// OTHER STANDARD MATCHERS /// | |
171 | /////////////////////////////// | |
172 | ||
173 | if (!isStrict) { | |
174 | parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false}); | |
175 | parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false}); | |
176 | } | |
177 | parser->addMatcher(parser->fLocalMatchers.nan = {symbols}); | |
178 | parser->addMatcher(parser->fLocalMatchers.infinity = {symbols}); | |
179 | UnicodeString padString = properties.padString; | |
180 | if (!padString.isBogus() && !ignorables.getSet()->contains(padString)) { | |
181 | parser->addMatcher(parser->fLocalMatchers.padding = {padString}); | |
182 | } | |
183 | parser->addMatcher(parser->fLocalMatchers.ignorables); | |
184 | parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags}); | |
185 | // NOTE: parseNoExponent doesn't disable scientific parsing if we have a scientific formatter | |
186 | if (!properties.parseNoExponent || properties.minimumExponentDigits > 0) { | |
187 | parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper}); | |
188 | } | |
189 | ||
190 | ////////////////// | |
191 | /// VALIDATORS /// | |
192 | ////////////////// | |
193 | ||
194 | parser->addMatcher(parser->fLocalValidators.number = {}); | |
195 | if (isStrict) { | |
196 | parser->addMatcher(parser->fLocalValidators.affix = {}); | |
197 | } | |
198 | if (parseCurrency) { | |
199 | parser->addMatcher(parser->fLocalValidators.currency = {}); | |
200 | } | |
201 | if (properties.decimalPatternMatchRequired) { | |
202 | bool patternHasDecimalSeparator = | |
203 | properties.decimalSeparatorAlwaysShown || properties.maximumFractionDigits != 0; | |
204 | parser->addMatcher(parser->fLocalValidators.decimalSeparator = {patternHasDecimalSeparator}); | |
205 | } | |
206 | // The multiplier takes care of scaling percentages. | |
207 | Scale multiplier = scaleFromProperties(properties); | |
208 | if (multiplier.isValid()) { | |
209 | parser->addMatcher(parser->fLocalValidators.multiplier = {multiplier}); | |
210 | } | |
211 | ||
212 | parser->freeze(); | |
213 | return parser.orphan(); | |
214 | } | |
215 | ||
216 | NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags) | |
217 | : fParseFlags(parseFlags) { | |
218 | } | |
219 | ||
220 | NumberParserImpl::~NumberParserImpl() { | |
221 | fNumMatchers = 0; | |
222 | } | |
223 | ||
224 | void NumberParserImpl::addMatcher(NumberParseMatcher& matcher) { | |
225 | if (fNumMatchers + 1 > fMatchers.getCapacity()) { | |
226 | fMatchers.resize(fNumMatchers * 2, fNumMatchers); | |
227 | } | |
228 | fMatchers[fNumMatchers] = &matcher; | |
229 | fNumMatchers++; | |
230 | } | |
231 | ||
232 | void NumberParserImpl::freeze() { | |
233 | fFrozen = true; | |
234 | } | |
235 | ||
236 | parse_flags_t NumberParserImpl::getParseFlags() const { | |
237 | return fParseFlags; | |
238 | } | |
239 | ||
240 | void NumberParserImpl::parse(const UnicodeString& input, bool greedy, ParsedNumber& result, | |
241 | UErrorCode& status) const { | |
242 | return parse(input, 0, greedy, result, status); | |
243 | } | |
244 | ||
245 | void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result, | |
246 | UErrorCode& status) const { | |
247 | if (U_FAILURE(status)) { | |
248 | return; | |
249 | } | |
250 | U_ASSERT(fFrozen); | |
251 | // TODO: Check start >= 0 and start < input.length() | |
252 | StringSegment segment(input, 0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE)); | |
253 | segment.adjustOffset(start); | |
254 | if (greedy) { | |
255 | parseGreedyRecursive(segment, result, status); | |
256 | } else { | |
257 | parseLongestRecursive(segment, result, status); | |
258 | } | |
259 | for (int32_t i = 0; i < fNumMatchers; i++) { | |
260 | fMatchers[i]->postProcess(result); | |
261 | } | |
262 | result.postProcess(); | |
263 | } | |
264 | ||
265 | void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, | |
266 | UErrorCode& status) const { | |
267 | // Base Case | |
268 | if (segment.length() == 0) { | |
269 | return; | |
270 | } | |
271 | ||
272 | int initialOffset = segment.getOffset(); | |
273 | for (int32_t i = 0; i < fNumMatchers; i++) { | |
274 | const NumberParseMatcher* matcher = fMatchers[i]; | |
275 | if (!matcher->smokeTest(segment)) { | |
276 | continue; | |
277 | } | |
278 | matcher->match(segment, result, status); | |
279 | if (U_FAILURE(status)) { | |
280 | return; | |
281 | } | |
282 | if (segment.getOffset() != initialOffset) { | |
283 | // In a greedy parse, recurse on only the first match. | |
284 | parseGreedyRecursive(segment, result, status); | |
285 | // The following line resets the offset so that the StringSegment says the same across | |
286 | // the function | |
287 | // call boundary. Since we recurse only once, this line is not strictly necessary. | |
288 | segment.setOffset(initialOffset); | |
289 | return; | |
290 | } | |
291 | } | |
292 | ||
293 | // NOTE: If we get here, the greedy parse completed without consuming the entire string. | |
294 | } | |
295 | ||
296 | void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumber& result, | |
297 | UErrorCode& status) const { | |
298 | // Base Case | |
299 | if (segment.length() == 0) { | |
300 | return; | |
301 | } | |
302 | ||
303 | // TODO: Give a nice way for the matcher to reset the ParsedNumber? | |
304 | ParsedNumber initial(result); | |
305 | ParsedNumber candidate; | |
306 | ||
307 | int initialOffset = segment.getOffset(); | |
308 | for (int32_t i = 0; i < fNumMatchers; i++) { | |
309 | const NumberParseMatcher* matcher = fMatchers[i]; | |
310 | if (!matcher->smokeTest(segment)) { | |
311 | continue; | |
312 | } | |
313 | ||
314 | // In a non-greedy parse, we attempt all possible matches and pick the best. | |
315 | for (int32_t charsToConsume = 0; charsToConsume < segment.length();) { | |
316 | charsToConsume += U16_LENGTH(segment.codePointAt(charsToConsume)); | |
317 | ||
318 | // Run the matcher on a segment of the current length. | |
319 | candidate = initial; | |
320 | segment.setLength(charsToConsume); | |
321 | bool maybeMore = matcher->match(segment, candidate, status); | |
322 | segment.resetLength(); | |
323 | if (U_FAILURE(status)) { | |
324 | return; | |
325 | } | |
326 | ||
327 | // If the entire segment was consumed, recurse. | |
328 | if (segment.getOffset() - initialOffset == charsToConsume) { | |
329 | parseLongestRecursive(segment, candidate, status); | |
330 | if (U_FAILURE(status)) { | |
331 | return; | |
332 | } | |
333 | if (candidate.isBetterThan(result)) { | |
334 | result = candidate; | |
335 | } | |
336 | } | |
337 | ||
338 | // Since the segment can be re-used, reset the offset. | |
339 | // This does not have an effect if the matcher did not consume any chars. | |
340 | segment.setOffset(initialOffset); | |
341 | ||
342 | // Unless the matcher wants to see the next char, continue to the next matcher. | |
343 | if (!maybeMore) { | |
344 | break; | |
345 | } | |
346 | } | |
347 | } | |
348 | } | |
349 | ||
350 | UnicodeString NumberParserImpl::toString() const { | |
351 | UnicodeString result(u"<NumberParserImpl matchers:["); | |
352 | for (int32_t i = 0; i < fNumMatchers; i++) { | |
353 | result.append(u' '); | |
354 | result.append(fMatchers[i]->toString()); | |
355 | } | |
356 | result.append(u" ]>", -1); | |
357 | return result; | |
358 | } | |
359 | ||
360 | ||
361 | #endif /* #if !UCONFIG_NO_FORMATTING */ |