[apple/icu.git] / icuSources / i18n / numparse_scientific.cpp

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT

#include "numparse_types.h"
#include "numparse_scientific.h"
#include "static_unicode_sets.h"

using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;


namespace {

inline const UnicodeSet& minusSignSet() {
    return *unisets::get(unisets::MINUS_SIGN);
}

inline const UnicodeSet& plusSignSet() {
    return *unisets::get(unisets::PLUS_SIGN);
}

} // namespace


ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
        : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
          fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED) {

    const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
    if (minusSignSet().contains(minusSign)) {
        fCustomMinusSign.setToBogus();
    } else {
        fCustomMinusSign = minusSign;
    }

    const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
    if (plusSignSet().contains(plusSign)) {
        fCustomPlusSign.setToBogus();
    } else {
        fCustomPlusSign = plusSign;
    }
}

bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
    // Only accept scientific notation after the mantissa.
    if (!result.seenNumber()) {
        return false;
    }

    // First match the scientific separator, and then match another number after it.
    // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
    int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
    if (overlap1 == fExponentSeparatorString.length()) {
        // Full exponent separator match.

        // First attempt to get a code point, returning true if we can't get one.
        if (segment.length() == overlap1) {
            return true;
        }
        segment.adjustOffset(overlap1);

        // Allow a sign, and then try to match digits.
        int8_t exponentSign = 1;
        if (segment.startsWith(minusSignSet())) {
            exponentSign = -1;
            segment.adjustOffsetByCodePoint();
        } else if (segment.startsWith(plusSignSet())) {
            segment.adjustOffsetByCodePoint();
        } else if (segment.startsWith(fCustomMinusSign)) {
            // Note: call site is guarded with startsWith, which returns false on empty string
            int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
            if (overlap2 != fCustomMinusSign.length()) {
                // Partial custom sign match; un-match the exponent separator.
                segment.adjustOffset(-overlap1);
                return true;
            }
            exponentSign = -1;
            segment.adjustOffset(overlap2);
        } else if (segment.startsWith(fCustomPlusSign)) {
            // Note: call site is guarded with startsWith, which returns false on empty string
            int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
            if (overlap2 != fCustomPlusSign.length()) {
                // Partial custom sign match; un-match the exponent separator.
                segment.adjustOffset(-overlap1);
                return true;
            }
            segment.adjustOffset(overlap2);
        }

        // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
        bool wasBogus = result.quantity.bogus;
        result.quantity.bogus = false;
        int digitsOffset = segment.getOffset();
        bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
        result.quantity.bogus = wasBogus;

        if (segment.getOffset() != digitsOffset) {
            // At least one exponent digit was matched.
            result.flags |= FLAG_HAS_EXPONENT;
        } else {
            // No exponent digits were matched; un-match the exponent separator.
            segment.adjustOffset(-overlap1);
        }
        return digitsReturnValue;

    } else if (overlap1 == segment.length()) {
        // Partial exponent separator match
        return true;
    }

    // No match
    return false;
}

bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
    return segment.startsWith(fExponentSeparatorString);
}

UnicodeString ScientificMatcher::toString() const {
    return u"<Scientific>";
}


#endif /* #if !UCONFIG_NO_FORMATTING */
Commit	Line	Data
0f5d89e8 A	1	// © 2018 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3
	4	#include "unicode/utypes.h"
	5
	6	#if !UCONFIG_NO_FORMATTING
	7
	8	// Allow implicit conversion from char16_t* to UnicodeString for this file:
	9	// Helpful in toString methods and elsewhere.
	10	#define UNISTR_FROM_STRING_EXPLICIT
	11
	12	#include "numparse_types.h"
	13	#include "numparse_scientific.h"
	14	#include "static_unicode_sets.h"
	15
	16	using namespace icu;
	17	using namespace icu::numparse;
	18	using namespace icu::numparse::impl;
	19
	20
	21	namespace {
	22
	23	inline const UnicodeSet& minusSignSet() {
	24	return *unisets::get(unisets::MINUS_SIGN);
	25	}
	26
	27	inline const UnicodeSet& plusSignSet() {
	28	return *unisets::get(unisets::PLUS_SIGN);
	29	}
	30
	31	} // namespace
	32
	33
	34	ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
	35	: fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
	36	fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY \| PARSE_FLAG_GROUPING_DISABLED) {
	37
	38	const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
	39	if (minusSignSet().contains(minusSign)) {
	40	fCustomMinusSign.setToBogus();
	41	} else {
	42	fCustomMinusSign = minusSign;
	43	}
	44
	45	const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
	46	if (plusSignSet().contains(plusSign)) {
	47	fCustomPlusSign.setToBogus();
	48	} else {
	49	fCustomPlusSign = plusSign;
	50	}
	51	}
	52
	53	bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
	54	// Only accept scientific notation after the mantissa.
	55	if (!result.seenNumber()) {
	56	return false;
	57	}
	58
	59	// First match the scientific separator, and then match another number after it.
	60	// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
	61	int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
	62	if (overlap1 == fExponentSeparatorString.length()) {
	63	// Full exponent separator match.
	64
65	// First attempt to get a code point, returning true if we can't get one.
66	if (segment.length() == overlap1) {
67	return true;
68	}
69	segment.adjustOffset(overlap1);
70
71	// Allow a sign, and then try to match digits.
72	int8_t exponentSign = 1;
73	if (segment.startsWith(minusSignSet())) {
74	exponentSign = -1;
75	segment.adjustOffsetByCodePoint();
76	} else if (segment.startsWith(plusSignSet())) {
77	segment.adjustOffsetByCodePoint();
78	} else if (segment.startsWith(fCustomMinusSign)) {
79	// Note: call site is guarded with startsWith, which returns false on empty string
80	int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
81	if (overlap2 != fCustomMinusSign.length()) {
82	// Partial custom sign match; un-match the exponent separator.
83	segment.adjustOffset(-overlap1);
84	return true;
85	}
86	exponentSign = -1;
87	segment.adjustOffset(overlap2);
88	} else if (segment.startsWith(fCustomPlusSign)) {
89	// Note: call site is guarded with startsWith, which returns false on empty string
90	int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
91	if (overlap2 != fCustomPlusSign.length()) {
92	// Partial custom sign match; un-match the exponent separator.
93	segment.adjustOffset(-overlap1);
94	return true;
95	}
96	segment.adjustOffset(overlap2);
97	}
98
99	// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
100	bool wasBogus = result.quantity.bogus;
101	result.quantity.bogus = false;
102	int digitsOffset = segment.getOffset();
103	bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
104	result.quantity.bogus = wasBogus;
105
106	if (segment.getOffset() != digitsOffset) {
107	// At least one exponent digit was matched.
108	result.flags \|= FLAG_HAS_EXPONENT;
109	} else {
110	// No exponent digits were matched; un-match the exponent separator.
111	segment.adjustOffset(-overlap1);
112	}
113	return digitsReturnValue;
114
115	} else if (overlap1 == segment.length()) {
116	// Partial exponent separator match
117	return true;
118	}
119
120	// No match
121	return false;
122	}
123
124	bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
125	return segment.startsWith(fExponentSeparatorString);
126	}
127
128	UnicodeString ScientificMatcher::toString() const {
129	return u"<Scientific>";
130	}
131
132
133	#endif /* #if !UCONFIG_NO_FORMATTING */