]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | #include "unicode/utypes.h" | |
5 | ||
6 | #if !UCONFIG_NO_FORMATTING | |
7 | ||
8 | // Allow implicit conversion from char16_t* to UnicodeString for this file: | |
9 | // Helpful in toString methods and elsewhere. | |
10 | #define UNISTR_FROM_STRING_EXPLICIT | |
11 | ||
12 | #include "numparse_types.h" | |
13 | #include "numparse_scientific.h" | |
14 | #include "static_unicode_sets.h" | |
15 | ||
16 | using namespace icu; | |
17 | using namespace icu::numparse; | |
18 | using namespace icu::numparse::impl; | |
19 | ||
20 | ||
21 | namespace { | |
22 | ||
23 | inline const UnicodeSet& minusSignSet() { | |
24 | return *unisets::get(unisets::MINUS_SIGN); | |
25 | } | |
26 | ||
27 | inline const UnicodeSet& plusSignSet() { | |
28 | return *unisets::get(unisets::PLUS_SIGN); | |
29 | } | |
30 | ||
3d1f044b A |
31 | inline const UnicodeSet& ignorablesSet() { // <rdar://problem/39156484> |
32 | return *unisets::get(unisets::STRICT_IGNORABLES); | |
33 | } | |
34 | ||
0f5d89e8 A |
35 | } // namespace |
36 | ||
37 | ||
38 | ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper) | |
39 | : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)), | |
40 | fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED) { | |
41 | ||
42 | const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); | |
43 | if (minusSignSet().contains(minusSign)) { | |
44 | fCustomMinusSign.setToBogus(); | |
45 | } else { | |
46 | fCustomMinusSign = minusSign; | |
47 | } | |
48 | ||
49 | const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); | |
50 | if (plusSignSet().contains(plusSign)) { | |
51 | fCustomPlusSign.setToBogus(); | |
52 | } else { | |
53 | fCustomPlusSign = plusSign; | |
54 | } | |
55 | } | |
56 | ||
57 | bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { | |
58 | // Only accept scientific notation after the mantissa. | |
59 | if (!result.seenNumber()) { | |
60 | return false; | |
61 | } | |
62 | ||
3d1f044b A |
63 | // Only accept one exponent per string. |
64 | if (0 != (result.flags & FLAG_HAS_EXPONENT)) { | |
65 | return false; | |
66 | } | |
67 | ||
0f5d89e8 A |
68 | // First match the scientific separator, and then match another number after it. |
69 | // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again. | |
70 | int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString); | |
71 | if (overlap1 == fExponentSeparatorString.length()) { | |
72 | // Full exponent separator match. | |
3d1f044b | 73 | int32_t exponentStart = segment.getOffset(); // <rdar://problem/39156484> |
0f5d89e8 A |
74 | |
75 | // First attempt to get a code point, returning true if we can't get one. | |
76 | if (segment.length() == overlap1) { | |
77 | return true; | |
78 | } | |
79 | segment.adjustOffset(overlap1); | |
80 | ||
81 | // Allow a sign, and then try to match digits. | |
3d1f044b A |
82 | while (segment.length() > 0 && segment.startsWith(ignorablesSet())) { // <rdar://problem/39156484> |
83 | segment.adjustOffsetByCodePoint(); | |
84 | } | |
0f5d89e8 A |
85 | int8_t exponentSign = 1; |
86 | if (segment.startsWith(minusSignSet())) { | |
87 | exponentSign = -1; | |
88 | segment.adjustOffsetByCodePoint(); | |
89 | } else if (segment.startsWith(plusSignSet())) { | |
90 | segment.adjustOffsetByCodePoint(); | |
91 | } else if (segment.startsWith(fCustomMinusSign)) { | |
92 | // Note: call site is guarded with startsWith, which returns false on empty string | |
93 | int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign); | |
94 | if (overlap2 != fCustomMinusSign.length()) { | |
95 | // Partial custom sign match; un-match the exponent separator. | |
3d1f044b | 96 | segment.setOffset(exponentStart); |
0f5d89e8 A |
97 | return true; |
98 | } | |
99 | exponentSign = -1; | |
100 | segment.adjustOffset(overlap2); | |
101 | } else if (segment.startsWith(fCustomPlusSign)) { | |
102 | // Note: call site is guarded with startsWith, which returns false on empty string | |
103 | int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign); | |
104 | if (overlap2 != fCustomPlusSign.length()) { | |
105 | // Partial custom sign match; un-match the exponent separator. | |
3d1f044b | 106 | segment.setOffset(exponentStart); |
0f5d89e8 A |
107 | return true; |
108 | } | |
109 | segment.adjustOffset(overlap2); | |
110 | } | |
3d1f044b A |
111 | while (segment.length() > 0 && segment.startsWith(ignorablesSet())) { // <rdar://problem/39156484> |
112 | segment.adjustOffsetByCodePoint(); | |
113 | } | |
0f5d89e8 A |
114 | |
115 | // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available. | |
116 | bool wasBogus = result.quantity.bogus; | |
117 | result.quantity.bogus = false; | |
118 | int digitsOffset = segment.getOffset(); | |
119 | bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status); | |
120 | result.quantity.bogus = wasBogus; | |
121 | ||
122 | if (segment.getOffset() != digitsOffset) { | |
123 | // At least one exponent digit was matched. | |
124 | result.flags |= FLAG_HAS_EXPONENT; | |
125 | } else { | |
126 | // No exponent digits were matched; un-match the exponent separator. | |
3d1f044b | 127 | segment.setOffset(exponentStart); |
0f5d89e8 A |
128 | } |
129 | return digitsReturnValue; | |
130 | ||
131 | } else if (overlap1 == segment.length()) { | |
132 | // Partial exponent separator match | |
133 | return true; | |
134 | } | |
135 | ||
136 | // No match | |
137 | return false; | |
138 | } | |
139 | ||
140 | bool ScientificMatcher::smokeTest(const StringSegment& segment) const { | |
141 | return segment.startsWith(fExponentSeparatorString); | |
142 | } | |
143 | ||
144 | UnicodeString ScientificMatcher::toString() const { | |
145 | return u"<Scientific>"; | |
146 | } | |
147 | ||
148 | ||
149 | #endif /* #if !UCONFIG_NO_FORMATTING */ |