1 // ยฉ 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 #include "numbertest.h"
9 #include "numparse_impl.h"
10 #include "static_unicode_sets.h"
11 #include "unicode/dcfmtsym.h"
12 #include "unicode/testlog.h"
15 #include <numparse_affixes.h>
17 using icu::unisets::get
;
19 void NumberParserTest::runIndexedTest(int32_t index
, UBool exec
, const char*& name
, char*) {
21 logln("TestSuite NumberParserTest: ");
24 TESTCASE_AUTO(testBasic
);
25 TESTCASE_AUTO(testSeriesMatcher
);
26 TESTCASE_AUTO(testCombinedCurrencyMatcher
);
27 TESTCASE_AUTO(testAffixPatternMatcher
);
31 void NumberParserTest::testBasic() {
32 IcuTestErrorCode
status(*this, "testBasic");
34 static const struct TestCase
{
36 const char16_t* inputString
;
37 const char16_t* patternString
;
38 int32_t expectedCharsConsumed
;
39 double expectedResultDouble
;
40 } cases
[] = {{3, u
"51423", u
"0", 5, 51423.},
41 {3, u
"51423x", u
"0", 5, 51423.},
42 {3, u
" 51423", u
"0", 6, 51423.},
43 {3, u
"51423 ", u
"0", 5, 51423.},
44 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u
"0", 10, 51423.},
45 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏx", u
"0", 10, 51423.},
46 {3, u
" ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u
"0", 11, 51423.},
47 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ ", u
"0", 10, 51423.},
48 {7, u
"51,423", u
"#,##,##0", 6, 51423.},
49 {7, u
" 51,423", u
"#,##,##0", 7, 51423.},
50 {7, u
"51,423 ", u
"#,##,##0", 6, 51423.},
51 {7, u
"51,423,", u
"#,##,##0", 6, 51423.},
52 {7, u
"51,423,,", u
"#,##,##0", 6, 51423.},
53 {7, u
"51,423.5", u
"#,##,##0", 8, 51423.5},
54 {7, u
"51,423.5,", u
"#,##,##0", 8, 51423.5},
55 {7, u
"51,423.5,,", u
"#,##,##0", 8, 51423.5},
56 {7, u
"51,423.5.", u
"#,##,##0", 8, 51423.5},
57 {7, u
"51,423.5..", u
"#,##,##0", 8, 51423.5},
58 {7, u
"๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", u
"#,##,##0", 11, 51423.},
59 {7, u
"๐ณ,๐ด๐ต,๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", u
"#,##,##0", 19, 78951423.},
60 {7, u
"๐ณ๐ด,๐ต๐ฑ๐ญ.๐ฐ๐ฎ๐ฏ", u
"#,##,##0", 18, 78951.423},
61 {7, u
"๐ณ๐ด,๐ฌ๐ฌ๐ฌ", u
"#,##,##0", 11, 78000.},
62 {7, u
"๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฌ๐ฌ", u
"#,##,##0", 18, 78000.},
63 {7, u
"๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", u
"#,##,##0", 18, 78000.023},
64 {7, u
"๐ณ๐ด.๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", u
"#,##,##0", 11, 78.},
65 {7, u
"1,", u
"#,##,##0", 1, 1.},
66 {7, u
"1,,", u
"#,##,##0", 1, 1.},
67 {7, u
"1.,", u
"#,##,##0", 2, 1.},
68 {3, u
"1,.", u
"#,##,##0", 3, 1.},
69 {7, u
"1..", u
"#,##,##0", 2, 1.},
70 {3, u
",1", u
"#,##,##0", 2, 1.},
71 {3, u
"1,1", u
"#,##,##0", 1, 1.},
72 {3, u
"1,1,", u
"#,##,##0", 1, 1.},
73 {3, u
"1,1,,", u
"#,##,##0", 1, 1.},
74 {3, u
"1,1,1", u
"#,##,##0", 1, 1.},
75 {3, u
"1,1,1,", u
"#,##,##0", 1, 1.},
76 {3, u
"1,1,1,,", u
"#,##,##0", 1, 1.},
77 {3, u
"-51423", u
"0", 6, -51423.},
78 {3, u
"51423-", u
"0", 5, 51423.}, // plus and minus sign by default do NOT match after
79 {3, u
"+51423", u
"0", 6, 51423.},
80 {3, u
"51423+", u
"0", 5, 51423.}, // plus and minus sign by default do NOT match after
81 {3, u
"%51423", u
"0", 6, 51423.},
82 {3, u
"51423%", u
"0", 6, 51423.},
83 {3, u
"51423%%", u
"0", 6, 51423.},
84 {3, u
"โฐ51423", u
"0", 6, 51423.},
85 {3, u
"51423โฐ", u
"0", 6, 51423.},
86 {3, u
"51423โฐโฐ", u
"0", 6, 51423.},
87 {3, u
"โ", u
"0", 1, INFINITY
},
88 {3, u
"-โ", u
"0", 2, -INFINITY
},
89 {3, u
"@@@123 @@", u
"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
90 {3, u
"@@@123@@ ", u
"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
91 {3, u
"a51423US dollars", u
"a0ยคยคยค", 16, 51423.},
92 {3, u
"a 51423 US dollars", u
"a0ยคยคยค", 18, 51423.},
93 {3, u
"514.23 USD", u
"ยค0", 10, 514.23},
94 {3, u
"514.23 GBP", u
"ยค0", 10, 514.23},
95 {3, u
"a ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ b", u
"a0b", 14, 51423.},
96 {3, u
"-a ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ b", u
"a0b", 15, -51423.},
97 {3, u
"a -๐ฑ๐ญ๐ฐ๐ฎ๐ฏ b", u
"a0b", 15, -51423.},
98 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u
"[0];(0)", 10, 51423.},
99 {3, u
"[๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u
"[0];(0)", 11, 51423.},
100 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ]", u
"[0];(0)", 11, 51423.},
101 {3, u
"[๐ฑ๐ญ๐ฐ๐ฎ๐ฏ]", u
"[0];(0)", 12, 51423.},
102 {3, u
"(๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u
"[0];(0)", 11, -51423.},
103 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ)", u
"[0];(0)", 11, -51423.},
104 {3, u
"(๐ฑ๐ญ๐ฐ๐ฎ๐ฏ)", u
"[0];(0)", 12, -51423.},
105 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u
"{0};{0}", 10, 51423.},
106 {3, u
"{๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u
"{0};{0}", 11, 51423.},
107 {3, u
"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ}", u
"{0};{0}", 11, 51423.},
108 {3, u
"{๐ฑ๐ญ๐ฐ๐ฎ๐ฏ}", u
"{0};{0}", 12, 51423.},
109 {1, u
"a40b", u
"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
110 {2, u
"a40b", u
"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
111 {3, u
"๐ฑ.๐ญ๐ฐ๐ฎE๐ฏ", u
"0", 12, 5142.},
112 {3, u
"๐ฑ.๐ญ๐ฐ๐ฎE-๐ฏ", u
"0", 13, 0.005142},
113 {3, u
"๐ฑ.๐ญ๐ฐ๐ฎe-๐ฏ", u
"0", 13, 0.005142},
114 {7, u
"5,142.50 Canadian dollars", u
"#,##,##0 ยคยคยค", 25, 5142.5},
115 {3, u
"a$ b5", u
"a ยค b0", 5, 5.0},
116 {3, u
"๐บ1.23", u
"๐บ0;๐ป0", 6, 1.23},
117 {3, u
"๐ป1.23", u
"๐บ0;๐ป0", 6, -1.23},
118 {3, u
".00", u
"0", 3, 0.0},
119 {3, u
" 1,234", u
"a0", 35, 1234.}, // should not hang
120 {3, u
"NaN", u
"0", 3, NAN
},
121 {3, u
"NaN E5", u
"0", 6, NAN
},
122 {3, u
"0", u
"0", 1, 0.0}};
124 parse_flags_t parseFlags
= PARSE_FLAG_IGNORE_CASE
| PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
;
125 for (auto& cas
: cases
) {
126 UnicodeString
inputString(cas
.inputString
);
127 UnicodeString
patternString(cas
.patternString
);
128 LocalPointer
<const NumberParserImpl
> parser(
129 NumberParserImpl::createSimpleParser(
130 Locale("en"), patternString
, parseFlags
, status
));
131 if (status
.errDataIfFailureAndReset("createSimpleParser() failed")) {
134 UnicodeString message
=
135 UnicodeString("Input <") + inputString
+ UnicodeString("> Parser ") + parser
->toString();
137 if (0 != (cas
.flags
& 0x01)) {
138 // Test greedy code path
139 ParsedNumber resultObject
;
140 parser
->parse(inputString
, true, resultObject
, status
);
141 assertTrue("Greedy Parse failed: " + message
, resultObject
.success());
143 "Greedy Parse failed: " + message
, cas
.expectedCharsConsumed
, resultObject
.charEnd
);
145 "Greedy Parse failed: " + message
, cas
.expectedResultDouble
, resultObject
.getDouble());
148 if (0 != (cas
.flags
& 0x02)) {
149 // Test slow code path
150 ParsedNumber resultObject
;
151 parser
->parse(inputString
, false, resultObject
, status
);
152 assertTrue("Non-Greedy Parse failed: " + message
, resultObject
.success());
154 "Non-Greedy Parse failed: " + message
,
155 cas
.expectedCharsConsumed
,
156 resultObject
.charEnd
);
158 "Non-Greedy Parse failed: " + message
,
159 cas
.expectedResultDouble
,
160 resultObject
.getDouble());
163 if (0 != (cas
.flags
& 0x04)) {
164 // Test with strict separators
166 NumberParserImpl::createSimpleParser(
169 parseFlags
| PARSE_FLAG_STRICT_GROUPING_SIZE
,
171 ParsedNumber resultObject
;
172 parser
->parse(inputString
, true, resultObject
, status
);
173 assertTrue("Strict Parse failed: " + message
, resultObject
.success());
175 "Strict Parse failed: " + message
, cas
.expectedCharsConsumed
, resultObject
.charEnd
);
177 "Strict Parse failed: " + message
, cas
.expectedResultDouble
, resultObject
.getDouble());
182 void NumberParserTest::testSeriesMatcher() {
183 IcuTestErrorCode
status(*this, "testSeriesMatcher");
185 DecimalFormatSymbols
symbols("en", status
);
186 if (status
.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
189 PlusSignMatcher
m0(symbols
, false);
190 MinusSignMatcher
m1(symbols
, false);
191 IgnorablesMatcher
m2(unisets::DEFAULT_IGNORABLES
);
192 PercentMatcher
m3(symbols
);
193 IgnorablesMatcher
m4(unisets::DEFAULT_IGNORABLES
);
195 ArraySeriesMatcher::MatcherArray
matchers(5);
201 ArraySeriesMatcher
series(matchers
, 5);
203 assertFalse("", series
.smokeTest(StringSegment(u
"x", false)));
204 assertFalse("", series
.smokeTest(StringSegment(u
"-", false)));
205 assertTrue("", series
.smokeTest(StringSegment(u
"+", false)));
207 static const struct TestCase
{
208 const char16_t* input
;
209 int32_t expectedOffset
;
210 bool expectedMaybeMore
;
211 } cases
[] = {{u
"", 0, true},
221 {u
" +- % ", 0, false},
223 {u
"+-%$", 3, false}};
225 for (auto& cas
: cases
) {
226 UnicodeString
input(cas
.input
);
228 StringSegment
segment(input
, false);
230 bool actualMaybeMore
= series
.match(segment
, result
, status
);
231 int actualOffset
= segment
.getOffset();
233 assertEquals("'" + input
+ "'", cas
.expectedOffset
, actualOffset
);
234 assertEquals("'" + input
+ "'", cas
.expectedMaybeMore
, actualMaybeMore
);
238 void NumberParserTest::testCombinedCurrencyMatcher() {
239 IcuTestErrorCode
status(*this, "testCombinedCurrencyMatcher");
241 IgnorablesMatcher
ignorables(unisets::DEFAULT_IGNORABLES
);
242 Locale locale
= Locale::getEnglish();
244 DecimalFormatSymbols
dfs(locale
, status
);
245 if (status
.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
248 dfs
.setSymbol(DecimalFormatSymbols::kCurrencySymbol
, u
"IU$", status
);
249 dfs
.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol
, u
"ICU", status
);
250 CurrencySymbols
currencySymbols({u
"ICU", status
}, locale
, dfs
, status
);
252 AffixTokenMatcherSetupData affixSetupData
= {
253 currencySymbols
, {"en", status
}, ignorables
, "en", 0};
254 AffixTokenMatcherWarehouse
warehouse(&affixSetupData
);
255 NumberParseMatcher
& matcher
= warehouse
.currency(status
);
256 affixSetupData
.parseFlags
= PARSE_FLAG_NO_FOREIGN_CURRENCY
;
257 AffixTokenMatcherWarehouse
warehouseNoForeign(&affixSetupData
);
258 NumberParseMatcher
& matcherNoForeign
= warehouseNoForeign
.currency(status
);
260 static const struct TestCase
{
261 const char16_t* input
;
262 const char16_t* expectedCurrencyCode
;
263 const char16_t* expectedNoForeignCurrencyCode
;
264 } cases
[]{{u
"", u
"", u
""},
266 {u
"USD", u
"USD", u
""},
268 {u
"US dollars", u
"USD", u
""},
270 {u
"euros", u
"EUR", u
""},
271 {u
"ICU", u
"ICU", u
"ICU"},
272 {u
"IU$", u
"ICU", u
"ICU"}};
273 for (auto& cas
: cases
) {
274 UnicodeString
input(cas
.input
);
277 StringSegment
segment(input
, false);
279 matcher
.match(segment
, result
, status
);
282 cas
.expectedCurrencyCode
,
283 result
.currencyCode
);
285 "Whole string on " + input
,
286 cas
.expectedCurrencyCode
[0] == 0 ? 0 : input
.length(),
290 StringSegment
segment(input
, false);
292 matcherNoForeign
.match(segment
, result
, status
);
294 "[no foreign] Parsing " + input
,
295 cas
.expectedNoForeignCurrencyCode
,
296 result
.currencyCode
);
298 "[no foreign] Whole string on " + input
,
299 cas
.expectedNoForeignCurrencyCode
[0] == 0 ? 0 : input
.length(),
305 void NumberParserTest::testAffixPatternMatcher() {
306 IcuTestErrorCode
status(*this, "testAffixPatternMatcher");
307 Locale locale
= Locale::getEnglish();
308 IgnorablesMatcher
ignorables(unisets::DEFAULT_IGNORABLES
);
310 DecimalFormatSymbols
dfs(locale
, status
);
311 dfs
.setSymbol(DecimalFormatSymbols::kCurrencySymbol
, u
"IU$", status
);
312 dfs
.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol
, u
"ICU", status
);
313 CurrencySymbols
currencySymbols({u
"ICU", status
}, locale
, dfs
, status
);
315 AffixTokenMatcherSetupData affixSetupData
= {
316 currencySymbols
, {"en", status
}, ignorables
, "en", 0};
317 AffixTokenMatcherWarehouse
warehouse(&affixSetupData
);
319 static const struct TestCase
{
321 const char16_t* affixPattern
;
322 int32_t expectedMatcherLength
;
323 const char16_t* sampleParseableString
;
324 } cases
[] = {{false, u
"-", 1, u
"-"},
325 {false, u
"+-%", 5, u
"+-%"},
326 {true, u
"+-%", 3, u
"+-%"},
327 {false, u
"ab c", 5, u
"a bc"},
328 {true, u
"abc", 3, u
"abc"},
329 {false, u
"hello-to+this%veryยคlongโฐstring", 59, u
"hello-to+this%very USD longโฐstring"}};
331 for (auto& cas
: cases
) {
332 UnicodeString
affixPattern(cas
.affixPattern
);
333 UnicodeString
sampleParseableString(cas
.sampleParseableString
);
334 int parseFlags
= cas
.exactMatch
? PARSE_FLAG_EXACT_AFFIX
: 0;
337 AffixPatternMatcher matcher
= AffixPatternMatcher::fromAffixPattern(
338 affixPattern
, warehouse
, parseFlags
, &success
, status
);
339 if (!status
.errDataIfFailureAndReset("Creation should be successful")) {
341 // Check that the matcher has the expected number of children
342 assertEquals(affixPattern
+ " " + cas
.exactMatch
, cas
.expectedMatcherLength
, matcher
.length());
344 // Check that the matcher works on a sample string
345 StringSegment
segment(sampleParseableString
, false);
347 matcher
.match(segment
, result
, status
);
348 assertEquals(affixPattern
+ " " + cas
.exactMatch
, sampleParseableString
.length(), result
.charEnd
);