1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
12 #include "numparse_types.h"
13 #include "numparse_scientific.h"
14 #include "static_unicode_sets.h"
15 #include "string_segment.h"
18 using namespace icu::numparse
;
19 using namespace icu::numparse::impl
;
24 inline const UnicodeSet
& minusSignSet() {
25 return *unisets::get(unisets::MINUS_SIGN
);
28 inline const UnicodeSet
& plusSignSet() {
29 return *unisets::get(unisets::PLUS_SIGN
);
35 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols
& dfs
, const Grouper
& grouper
)
36 : fExponentSeparatorString(dfs
.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol
)),
37 fExponentMatcher(dfs
, grouper
, PARSE_FLAG_INTEGER_ONLY
| PARSE_FLAG_GROUPING_DISABLED
),
38 fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES
) {
40 const UnicodeString
& minusSign
= dfs
.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol
);
41 if (minusSignSet().contains(minusSign
)) {
42 fCustomMinusSign
.setToBogus();
44 fCustomMinusSign
= minusSign
;
47 const UnicodeString
& plusSign
= dfs
.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol
);
48 if (plusSignSet().contains(plusSign
)) {
49 fCustomPlusSign
.setToBogus();
51 fCustomPlusSign
= plusSign
;
55 bool ScientificMatcher::match(StringSegment
& segment
, ParsedNumber
& result
, UErrorCode
& status
) const {
56 // Only accept scientific notation after the mantissa.
57 if (!result
.seenNumber()) {
61 // Only accept one exponent per string.
62 if (0 != (result
.flags
& FLAG_HAS_EXPONENT
)) {
66 // First match the scientific separator, and then match another number after it.
67 // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
68 int32_t initialOffset
= segment
.getOffset();
69 int32_t overlap
= segment
.getCommonPrefixLength(fExponentSeparatorString
);
70 if (overlap
== fExponentSeparatorString
.length()) {
71 // Full exponent separator match.
73 // First attempt to get a code point, returning true if we can't get one.
74 if (segment
.length() == overlap
) {
77 segment
.adjustOffset(overlap
);
79 // Allow ignorables before the sign.
80 // Note: call site is guarded by the segment.length() check above.
81 // Note: the ignorables matcher should not touch the result.
82 fIgnorablesMatcher
.match(segment
, result
, status
);
83 if (segment
.length() == 0) {
84 segment
.setOffset(initialOffset
);
88 // Allow a sign, and then try to match digits.
89 int8_t exponentSign
= 1;
90 if (segment
.startsWith(minusSignSet())) {
92 segment
.adjustOffsetByCodePoint();
93 } else if (segment
.startsWith(plusSignSet())) {
94 segment
.adjustOffsetByCodePoint();
95 } else if (segment
.startsWith(fCustomMinusSign
)) {
96 overlap
= segment
.getCommonPrefixLength(fCustomMinusSign
);
97 if (overlap
!= fCustomMinusSign
.length()) {
98 // Partial custom sign match
99 segment
.setOffset(initialOffset
);
103 segment
.adjustOffset(overlap
);
104 } else if (segment
.startsWith(fCustomPlusSign
)) {
105 overlap
= segment
.getCommonPrefixLength(fCustomPlusSign
);
106 if (overlap
!= fCustomPlusSign
.length()) {
107 // Partial custom sign match
108 segment
.setOffset(initialOffset
);
111 segment
.adjustOffset(overlap
);
114 // Return true if the segment is empty.
115 if (segment
.length() == 0) {
116 segment
.setOffset(initialOffset
);
120 // Allow ignorables after the sign.
121 // Note: call site is guarded by the segment.length() check above.
122 // Note: the ignorables matcher should not touch the result.
123 fIgnorablesMatcher
.match(segment
, result
, status
);
124 if (segment
.length() == 0) {
125 segment
.setOffset(initialOffset
);
129 // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
130 bool wasBogus
= result
.quantity
.bogus
;
131 result
.quantity
.bogus
= false;
132 int digitsOffset
= segment
.getOffset();
133 bool digitsReturnValue
= fExponentMatcher
.match(segment
, result
, exponentSign
, status
);
134 result
.quantity
.bogus
= wasBogus
;
136 if (segment
.getOffset() != digitsOffset
) {
137 // At least one exponent digit was matched.
138 result
.flags
|= FLAG_HAS_EXPONENT
;
140 // No exponent digits were matched
141 segment
.setOffset(initialOffset
);
143 return digitsReturnValue
;
145 } else if (overlap
== segment
.length()) {
146 // Partial exponent separator match
154 bool ScientificMatcher::smokeTest(const StringSegment
& segment
) const {
155 return segment
.startsWith(fExponentSeparatorString
);
158 UnicodeString
ScientificMatcher::toString() const {
159 return u
"<Scientific>";
163 #endif /* #if !UCONFIG_NO_FORMATTING */