]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/numbertest_parse.cpp
ICU-62123.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / numbertest_parse.cpp
1 // ยฉ 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "numbertest.h"
9 #include "numparse_impl.h"
10 #include "static_unicode_sets.h"
11 #include "unicode/dcfmtsym.h"
12 #include "unicode/testlog.h"
13
14 #include <cmath>
15 #include <numparse_affixes.h>
16
17 using icu::unisets::get;
18
19 void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
20 if (exec) {
21 logln("TestSuite NumberParserTest: ");
22 }
23 TESTCASE_AUTO_BEGIN;
24 TESTCASE_AUTO(testBasic);
25 TESTCASE_AUTO(testSeriesMatcher);
26 TESTCASE_AUTO(testCombinedCurrencyMatcher);
27 TESTCASE_AUTO(testAffixPatternMatcher);
28 TESTCASE_AUTO_END;
29 }
30
31 void NumberParserTest::testBasic() {
32 IcuTestErrorCode status(*this, "testBasic");
33
34 static const struct TestCase {
35 int32_t flags;
36 const char16_t* inputString;
37 const char16_t* patternString;
38 int32_t expectedCharsConsumed;
39 double expectedResultDouble;
40 } cases[] = {{3, u"51423", u"0", 5, 51423.},
41 {3, u"51423x", u"0", 5, 51423.},
42 {3, u" 51423", u"0", 6, 51423.},
43 {3, u"51423 ", u"0", 5, 51423.},
44 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"0", 10, 51423.},
45 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏx", u"0", 10, 51423.},
46 {3, u" ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"0", 11, 51423.},
47 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ ", u"0", 10, 51423.},
48 {7, u"51,423", u"#,##,##0", 6, 51423.},
49 {7, u" 51,423", u"#,##,##0", 7, 51423.},
50 {7, u"51,423 ", u"#,##,##0", 6, 51423.},
51 {7, u"51,423,", u"#,##,##0", 6, 51423.},
52 {7, u"51,423,,", u"#,##,##0", 6, 51423.},
53 {7, u"51,423.5", u"#,##,##0", 8, 51423.5},
54 {7, u"51,423.5,", u"#,##,##0", 8, 51423.5},
55 {7, u"51,423.5,,", u"#,##,##0", 8, 51423.5},
56 {7, u"51,423.5.", u"#,##,##0", 8, 51423.5},
57 {7, u"51,423.5..", u"#,##,##0", 8, 51423.5},
58 {7, u"๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 11, 51423.},
59 {7, u"๐Ÿณ,๐Ÿด๐Ÿต,๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 19, 78951423.},
60 {7, u"๐Ÿณ๐Ÿด,๐Ÿต๐Ÿฑ๐Ÿญ.๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 18, 78951.423},
61 {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ", u"#,##,##0", 11, 78000.},
62 {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฌ๐Ÿฌ", u"#,##,##0", 18, 78000.},
63 {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 18, 78000.023},
64 {7, u"๐Ÿณ๐Ÿด.๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 11, 78.},
65 {7, u"1,", u"#,##,##0", 1, 1.},
66 {7, u"1,,", u"#,##,##0", 1, 1.},
67 {7, u"1.,", u"#,##,##0", 2, 1.},
68 {3, u"1,.", u"#,##,##0", 3, 1.},
69 {7, u"1..", u"#,##,##0", 2, 1.},
70 {3, u",1", u"#,##,##0", 2, 1.},
71 {3, u"1,1", u"#,##,##0", 1, 1.},
72 {3, u"1,1,", u"#,##,##0", 1, 1.},
73 {3, u"1,1,,", u"#,##,##0", 1, 1.},
74 {3, u"1,1,1", u"#,##,##0", 1, 1.},
75 {3, u"1,1,1,", u"#,##,##0", 1, 1.},
76 {3, u"1,1,1,,", u"#,##,##0", 1, 1.},
77 {3, u"-51423", u"0", 6, -51423.},
78 {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
79 {3, u"+51423", u"0", 6, 51423.},
80 {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
81 {3, u"%51423", u"0", 6, 51423.},
82 {3, u"51423%", u"0", 6, 51423.},
83 {3, u"51423%%", u"0", 6, 51423.},
84 {3, u"โ€ฐ51423", u"0", 6, 51423.},
85 {3, u"51423โ€ฐ", u"0", 6, 51423.},
86 {3, u"51423โ€ฐโ€ฐ", u"0", 6, 51423.},
87 {3, u"โˆž", u"0", 1, INFINITY},
88 {3, u"-โˆž", u"0", 2, -INFINITY},
89 {3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
90 {3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
91 {3, u"a51423US dollars", u"a0ยคยคยค", 16, 51423.},
92 {3, u"a 51423 US dollars", u"a0ยคยคยค", 18, 51423.},
93 {3, u"514.23 USD", u"ยค0", 10, 514.23},
94 {3, u"514.23 GBP", u"ยค0", 10, 514.23},
95 {3, u"a ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ b", u"a0b", 14, 51423.},
96 {3, u"-a ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ b", u"a0b", 15, -51423.},
97 {3, u"a -๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ b", u"a0b", 15, -51423.},
98 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"[0];(0)", 10, 51423.},
99 {3, u"[๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"[0];(0)", 11, 51423.},
100 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ]", u"[0];(0)", 11, 51423.},
101 {3, u"[๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ]", u"[0];(0)", 12, 51423.},
102 {3, u"(๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"[0];(0)", 11, -51423.},
103 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ)", u"[0];(0)", 11, -51423.},
104 {3, u"(๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ)", u"[0];(0)", 12, -51423.},
105 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"{0};{0}", 10, 51423.},
106 {3, u"{๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"{0};{0}", 11, 51423.},
107 {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ}", u"{0};{0}", 11, 51423.},
108 {3, u"{๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ}", u"{0};{0}", 12, 51423.},
109 {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
110 {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
111 {3, u"๐Ÿฑ.๐Ÿญ๐Ÿฐ๐ŸฎE๐Ÿฏ", u"0", 12, 5142.},
112 {3, u"๐Ÿฑ.๐Ÿญ๐Ÿฐ๐ŸฎE-๐Ÿฏ", u"0", 13, 0.005142},
113 {3, u"๐Ÿฑ.๐Ÿญ๐Ÿฐ๐Ÿฎe-๐Ÿฏ", u"0", 13, 0.005142},
114 {7, u"5,142.50 Canadian dollars", u"#,##,##0 ยคยคยค", 25, 5142.5},
115 {3, u"a$ b5", u"a ยค b0", 5, 5.0},
116 {3, u"๐Ÿ“บ1.23", u"๐Ÿ“บ0;๐Ÿ“ป0", 6, 1.23},
117 {3, u"๐Ÿ“ป1.23", u"๐Ÿ“บ0;๐Ÿ“ป0", 6, -1.23},
118 {3, u".00", u"0", 3, 0.0},
119 {3, u" 1,234", u"a0", 35, 1234.}, // should not hang
120 {3, u"NaN", u"0", 3, NAN},
121 {3, u"NaN E5", u"0", 6, NAN},
122 {3, u"0", u"0", 1, 0.0}};
123
124 parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
125 for (auto& cas : cases) {
126 UnicodeString inputString(cas.inputString);
127 UnicodeString patternString(cas.patternString);
128 LocalPointer<const NumberParserImpl> parser(
129 NumberParserImpl::createSimpleParser(
130 Locale("en"), patternString, parseFlags, status));
131 if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
132 continue;
133 }
134 UnicodeString message =
135 UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString();
136
137 if (0 != (cas.flags & 0x01)) {
138 // Test greedy code path
139 ParsedNumber resultObject;
140 parser->parse(inputString, true, resultObject, status);
141 assertTrue("Greedy Parse failed: " + message, resultObject.success());
142 assertEquals(
143 "Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
144 assertEquals(
145 "Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
146 }
147
148 if (0 != (cas.flags & 0x02)) {
149 // Test slow code path
150 ParsedNumber resultObject;
151 parser->parse(inputString, false, resultObject, status);
152 assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
153 assertEquals(
154 "Non-Greedy Parse failed: " + message,
155 cas.expectedCharsConsumed,
156 resultObject.charEnd);
157 assertEquals(
158 "Non-Greedy Parse failed: " + message,
159 cas.expectedResultDouble,
160 resultObject.getDouble());
161 }
162
163 if (0 != (cas.flags & 0x04)) {
164 // Test with strict separators
165 parser.adoptInstead(
166 NumberParserImpl::createSimpleParser(
167 Locale("en"),
168 patternString,
169 parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
170 status));
171 ParsedNumber resultObject;
172 parser->parse(inputString, true, resultObject, status);
173 assertTrue("Strict Parse failed: " + message, resultObject.success());
174 assertEquals(
175 "Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
176 assertEquals(
177 "Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
178 }
179 }
180 }
181
182 void NumberParserTest::testSeriesMatcher() {
183 IcuTestErrorCode status(*this, "testSeriesMatcher");
184
185 DecimalFormatSymbols symbols("en", status);
186 if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
187 return;
188 }
189 PlusSignMatcher m0(symbols, false);
190 MinusSignMatcher m1(symbols, false);
191 IgnorablesMatcher m2(unisets::DEFAULT_IGNORABLES);
192 PercentMatcher m3(symbols);
193 IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES);
194
195 ArraySeriesMatcher::MatcherArray matchers(5);
196 matchers[0] = &m0;
197 matchers[1] = &m1;
198 matchers[2] = &m2;
199 matchers[3] = &m3;
200 matchers[4] = &m4;
201 ArraySeriesMatcher series(matchers, 5);
202
203 assertFalse("", series.smokeTest(StringSegment(u"x", false)));
204 assertFalse("", series.smokeTest(StringSegment(u"-", false)));
205 assertTrue("", series.smokeTest(StringSegment(u"+", false)));
206
207 static const struct TestCase {
208 const char16_t* input;
209 int32_t expectedOffset;
210 bool expectedMaybeMore;
211 } cases[] = {{u"", 0, true},
212 {u" ", 0, false},
213 {u"$", 0, false},
214 {u"+", 0, true},
215 {u" +", 0, false},
216 {u"+-", 0, true},
217 {u"+ -", 0, false},
218 {u"+- ", 0, true},
219 {u"+- $", 0, false},
220 {u"+-%", 3, true},
221 {u" +- % ", 0, false},
222 {u"+- % ", 7, true},
223 {u"+-%$", 3, false}};
224
225 for (auto& cas : cases) {
226 UnicodeString input(cas.input);
227
228 StringSegment segment(input, false);
229 ParsedNumber result;
230 bool actualMaybeMore = series.match(segment, result, status);
231 int actualOffset = segment.getOffset();
232
233 assertEquals("'" + input + "'", cas.expectedOffset, actualOffset);
234 assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore);
235 }
236 }
237
238 void NumberParserTest::testCombinedCurrencyMatcher() {
239 IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
240
241 IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
242 Locale locale = Locale::getEnglish();
243
244 DecimalFormatSymbols dfs(locale, status);
245 if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
246 return;
247 }
248 dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
249 dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
250 CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
251
252 AffixTokenMatcherSetupData affixSetupData = {
253 currencySymbols, {"en", status}, ignorables, "en", 0};
254 AffixTokenMatcherWarehouse warehouse(&affixSetupData);
255 NumberParseMatcher& matcher = warehouse.currency(status);
256 affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY;
257 AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData);
258 NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status);
259
260 static const struct TestCase {
261 const char16_t* input;
262 const char16_t* expectedCurrencyCode;
263 const char16_t* expectedNoForeignCurrencyCode;
264 } cases[]{{u"", u"", u""},
265 {u"FOO", u"", u""},
266 {u"USD", u"USD", u""},
267 {u"$", u"USD", u""},
268 {u"US dollars", u"USD", u""},
269 {u"eu", u"", u""},
270 {u"euros", u"EUR", u""},
271 {u"ICU", u"ICU", u"ICU"},
272 {u"IU$", u"ICU", u"ICU"}};
273 for (auto& cas : cases) {
274 UnicodeString input(cas.input);
275
276 {
277 StringSegment segment(input, false);
278 ParsedNumber result;
279 matcher.match(segment, result, status);
280 assertEquals(
281 "Parsing " + input,
282 cas.expectedCurrencyCode,
283 result.currencyCode);
284 assertEquals(
285 "Whole string on " + input,
286 cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
287 result.charEnd);
288 }
289 {
290 StringSegment segment(input, false);
291 ParsedNumber result;
292 matcherNoForeign.match(segment, result, status);
293 assertEquals(
294 "[no foreign] Parsing " + input,
295 cas.expectedNoForeignCurrencyCode,
296 result.currencyCode);
297 assertEquals(
298 "[no foreign] Whole string on " + input,
299 cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(),
300 result.charEnd);
301 }
302 }
303 }
304
305 void NumberParserTest::testAffixPatternMatcher() {
306 IcuTestErrorCode status(*this, "testAffixPatternMatcher");
307 Locale locale = Locale::getEnglish();
308 IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
309
310 DecimalFormatSymbols dfs(locale, status);
311 dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
312 dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
313 CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
314
315 AffixTokenMatcherSetupData affixSetupData = {
316 currencySymbols, {"en", status}, ignorables, "en", 0};
317 AffixTokenMatcherWarehouse warehouse(&affixSetupData);
318
319 static const struct TestCase {
320 bool exactMatch;
321 const char16_t* affixPattern;
322 int32_t expectedMatcherLength;
323 const char16_t* sampleParseableString;
324 } cases[] = {{false, u"-", 1, u"-"},
325 {false, u"+-%", 5, u"+-%"},
326 {true, u"+-%", 3, u"+-%"},
327 {false, u"ab c", 5, u"a bc"},
328 {true, u"abc", 3, u"abc"},
329 {false, u"hello-to+this%veryยคlongโ€ฐstring", 59, u"hello-to+this%very USD longโ€ฐstring"}};
330
331 for (auto& cas : cases) {
332 UnicodeString affixPattern(cas.affixPattern);
333 UnicodeString sampleParseableString(cas.sampleParseableString);
334 int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0;
335
336 bool success;
337 AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern(
338 affixPattern, warehouse, parseFlags, &success, status);
339 if (!status.errDataIfFailureAndReset("Creation should be successful")) {
340
341 // Check that the matcher has the expected number of children
342 assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length());
343
344 // Check that the matcher works on a sample string
345 StringSegment segment(sampleParseableString, false);
346 ParsedNumber result;
347 matcher.match(segment, result, status);
348 assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd);
349 }
350 }
351 }
352
353
354 #endif