1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
12 #include "numparse_types.h"
13 #include "numparse_scientific.h"
14 #include "static_unicode_sets.h"
17 using namespace icu::numparse
;
18 using namespace icu::numparse::impl
;
23 inline const UnicodeSet
& minusSignSet() {
24 return *unisets::get(unisets::MINUS_SIGN
);
27 inline const UnicodeSet
& plusSignSet() {
28 return *unisets::get(unisets::PLUS_SIGN
);
31 inline const UnicodeSet
& ignorablesSet() { // <rdar://problem/39156484>
32 return *unisets::get(unisets::STRICT_IGNORABLES
);
38 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols
& dfs
, const Grouper
& grouper
)
39 : fExponentSeparatorString(dfs
.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol
)),
40 fExponentMatcher(dfs
, grouper
, PARSE_FLAG_INTEGER_ONLY
| PARSE_FLAG_GROUPING_DISABLED
) {
42 const UnicodeString
& minusSign
= dfs
.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol
);
43 if (minusSignSet().contains(minusSign
)) {
44 fCustomMinusSign
.setToBogus();
46 fCustomMinusSign
= minusSign
;
49 const UnicodeString
& plusSign
= dfs
.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol
);
50 if (plusSignSet().contains(plusSign
)) {
51 fCustomPlusSign
.setToBogus();
53 fCustomPlusSign
= plusSign
;
57 bool ScientificMatcher::match(StringSegment
& segment
, ParsedNumber
& result
, UErrorCode
& status
) const {
58 // Only accept scientific notation after the mantissa.
59 if (!result
.seenNumber()) {
63 // Only accept one exponent per string.
64 if (0 != (result
.flags
& FLAG_HAS_EXPONENT
)) {
68 // First match the scientific separator, and then match another number after it.
69 // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
70 int overlap1
= segment
.getCommonPrefixLength(fExponentSeparatorString
);
71 if (overlap1
== fExponentSeparatorString
.length()) {
72 // Full exponent separator match.
73 int32_t exponentStart
= segment
.getOffset(); // <rdar://problem/39156484>
75 // First attempt to get a code point, returning true if we can't get one.
76 if (segment
.length() == overlap1
) {
79 segment
.adjustOffset(overlap1
);
81 // Allow a sign, and then try to match digits.
82 while (segment
.length() > 0 && segment
.startsWith(ignorablesSet())) { // <rdar://problem/39156484>
83 segment
.adjustOffsetByCodePoint();
85 int8_t exponentSign
= 1;
86 if (segment
.startsWith(minusSignSet())) {
88 segment
.adjustOffsetByCodePoint();
89 } else if (segment
.startsWith(plusSignSet())) {
90 segment
.adjustOffsetByCodePoint();
91 } else if (segment
.startsWith(fCustomMinusSign
)) {
92 // Note: call site is guarded with startsWith, which returns false on empty string
93 int32_t overlap2
= segment
.getCommonPrefixLength(fCustomMinusSign
);
94 if (overlap2
!= fCustomMinusSign
.length()) {
95 // Partial custom sign match; un-match the exponent separator.
96 segment
.setOffset(exponentStart
);
100 segment
.adjustOffset(overlap2
);
101 } else if (segment
.startsWith(fCustomPlusSign
)) {
102 // Note: call site is guarded with startsWith, which returns false on empty string
103 int32_t overlap2
= segment
.getCommonPrefixLength(fCustomPlusSign
);
104 if (overlap2
!= fCustomPlusSign
.length()) {
105 // Partial custom sign match; un-match the exponent separator.
106 segment
.setOffset(exponentStart
);
109 segment
.adjustOffset(overlap2
);
111 while (segment
.length() > 0 && segment
.startsWith(ignorablesSet())) { // <rdar://problem/39156484>
112 segment
.adjustOffsetByCodePoint();
115 // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
116 bool wasBogus
= result
.quantity
.bogus
;
117 result
.quantity
.bogus
= false;
118 int digitsOffset
= segment
.getOffset();
119 bool digitsReturnValue
= fExponentMatcher
.match(segment
, result
, exponentSign
, status
);
120 result
.quantity
.bogus
= wasBogus
;
122 if (segment
.getOffset() != digitsOffset
) {
123 // At least one exponent digit was matched.
124 result
.flags
|= FLAG_HAS_EXPONENT
;
126 // No exponent digits were matched; un-match the exponent separator.
127 segment
.setOffset(exponentStart
);
129 return digitsReturnValue
;
131 } else if (overlap1
== segment
.length()) {
132 // Partial exponent separator match
140 bool ScientificMatcher::smokeTest(const StringSegment
& segment
) const {
141 return segment
.startsWith(fExponentSeparatorString
);
144 UnicodeString
ScientificMatcher::toString() const {
145 return u
"<Scientific>";
149 #endif /* #if !UCONFIG_NO_FORMATTING */