]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/numparse_scientific.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / numparse_scientific.cpp
index e4285d6271a5044d7a114679acd2e22163326263..4b88cd998fee09a8d10680144db204bc5effe46f 100644 (file)
@@ -12,6 +12,7 @@
 #include "numparse_types.h"
 #include "numparse_scientific.h"
 #include "static_unicode_sets.h"
+#include "string_segment.h"
 
 using namespace icu;
 using namespace icu::numparse;
@@ -28,16 +29,13 @@ inline const UnicodeSet& plusSignSet() {
     return *unisets::get(unisets::PLUS_SIGN);
 }
 
-inline const UnicodeSet& ignorablesSet() { // <rdar://problem/39156484>
-    return *unisets::get(unisets::STRICT_IGNORABLES);
-}
-
 } // namespace
 
 
 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
         : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
-          fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED) {
+          fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
+          fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
 
     const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
     if (minusSignSet().contains(minusSign)) {
@@ -67,21 +65,27 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
 
     // First match the scientific separator, and then match another number after it.
     // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
-    int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
-    if (overlap1 == fExponentSeparatorString.length()) {
+    int32_t initialOffset = segment.getOffset();
+    int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
+    if (overlap == fExponentSeparatorString.length()) {
         // Full exponent separator match.
-        int32_t exponentStart = segment.getOffset(); // <rdar://problem/39156484>
 
         // First attempt to get a code point, returning true if we can't get one.
-        if (segment.length() == overlap1) {
+        if (segment.length() == overlap) {
+            return true;
+        }
+        segment.adjustOffset(overlap);
+
+        // Allow ignorables before the sign.
+        // Note: call site is guarded by the segment.length() check above.
+        // Note: the ignorables matcher should not touch the result.
+        fIgnorablesMatcher.match(segment, result, status);
+        if (segment.length() == 0) {
+            segment.setOffset(initialOffset);
             return true;
         }
-        segment.adjustOffset(overlap1);
 
         // Allow a sign, and then try to match digits.
-        while (segment.length() > 0 && segment.startsWith(ignorablesSet())) { // <rdar://problem/39156484>
-            segment.adjustOffsetByCodePoint();
-        }
         int8_t exponentSign = 1;
         if (segment.startsWith(minusSignSet())) {
             exponentSign = -1;
@@ -89,27 +93,37 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
         } else if (segment.startsWith(plusSignSet())) {
             segment.adjustOffsetByCodePoint();
         } else if (segment.startsWith(fCustomMinusSign)) {
-            // Note: call site is guarded with startsWith, which returns false on empty string
-            int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
-            if (overlap2 != fCustomMinusSign.length()) {
-                // Partial custom sign match; un-match the exponent separator.
-                segment.setOffset(exponentStart);
+            overlap = segment.getCommonPrefixLength(fCustomMinusSign);
+            if (overlap != fCustomMinusSign.length()) {
+                // Partial custom sign match
+                segment.setOffset(initialOffset);
                 return true;
             }
             exponentSign = -1;
-            segment.adjustOffset(overlap2);
+            segment.adjustOffset(overlap);
         } else if (segment.startsWith(fCustomPlusSign)) {
-            // Note: call site is guarded with startsWith, which returns false on empty string
-            int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
-            if (overlap2 != fCustomPlusSign.length()) {
-                // Partial custom sign match; un-match the exponent separator.
-                segment.setOffset(exponentStart);
+            overlap = segment.getCommonPrefixLength(fCustomPlusSign);
+            if (overlap != fCustomPlusSign.length()) {
+                // Partial custom sign match
+                segment.setOffset(initialOffset);
                 return true;
             }
-            segment.adjustOffset(overlap2);
+            segment.adjustOffset(overlap);
         }
-        while (segment.length() > 0 && segment.startsWith(ignorablesSet())) { // <rdar://problem/39156484>
-            segment.adjustOffsetByCodePoint();
+
+        // Return true if the segment is empty.
+        if (segment.length() == 0) {
+            segment.setOffset(initialOffset);
+            return true;
+        }
+
+        // Allow ignorables after the sign.
+        // Note: call site is guarded by the segment.length() check above.
+        // Note: the ignorables matcher should not touch the result.
+        fIgnorablesMatcher.match(segment, result, status);
+        if (segment.length() == 0) {
+            segment.setOffset(initialOffset);
+            return true;
         }
 
         // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
@@ -123,12 +137,12 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
             // At least one exponent digit was matched.
             result.flags |= FLAG_HAS_EXPONENT;
         } else {
-            // No exponent digits were matched; un-match the exponent separator.
-            segment.setOffset(exponentStart);
+            // No exponent digits were matched
+            segment.setOffset(initialOffset);
         }
         return digitsReturnValue;
 
-    } else if (overlap1 == segment.length()) {
+    } else if (overlap == segment.length()) {
         // Partial exponent separator match
         return true;
     }