[apple/icu.git] / icuSources / i18n / numparse_decimal.cpp

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT

#include "numparse_types.h"
#include "numparse_decimal.h"
#include "static_unicode_sets.h"
#include "numparse_utils.h"
#include "unicode/uchar.h"
#include "putilimp.h"
#include "number_decimalquantity.h"

using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;


DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
                               parse_flags_t parseFlags) {
    if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
        groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
        decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
    } else {
        groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
        decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
    }
    bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
    unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
                                                : unisets::ALL_SEPARATORS;

    // Attempt to find separators in the static cache

    groupingUniSet = unisets::get(groupingKey);
    unisets::Key decimalKey = unisets::chooseFrom(
            decimalSeparator,
            strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
            strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
    if (decimalKey >= 0) {
        decimalUniSet = unisets::get(decimalKey);
    } else if (!decimalSeparator.isEmpty()) {
        auto* set = new UnicodeSet();
        set->add(decimalSeparator.char32At(0));
        set->freeze();
        decimalUniSet = set;
        fLocalDecimalUniSet.adoptInstead(set);
    } else {
        decimalUniSet = unisets::get(unisets::EMPTY);
    }

    if (groupingKey >= 0 && decimalKey >= 0) {
        // Everything is available in the static cache
        separatorSet = groupingUniSet;
        leadSet = unisets::get(
                strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
                                 : unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
    } else {
        auto* set = new UnicodeSet();
        set->addAll(*groupingUniSet);
        set->addAll(*decimalUniSet);
        set->freeze();
        separatorSet = set;
        fLocalSeparatorSet.adoptInstead(set);
        leadSet = nullptr;
    }

    UChar32 cpZero = symbols.getCodePointZero();
    if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) {
        // Uncommon case: okay to allocate.
        auto digitStrings = new UnicodeString[10];
        fLocalDigitStrings.adoptInstead(digitStrings);
        for (int32_t i = 0; i <= 9; i++) {
            digitStrings[i] = symbols.getConstDigitSymbol(i);
        }
    }

    requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
    groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
    integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
    grouping1 = grouper.getPrimary();
    grouping2 = grouper.getSecondary();

    // Fraction grouping parsing is disabled for now but could be enabled later.
    // See http://bugs.icu-project.org/trac/ticket/10794
    // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED);
}

bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
    return match(segment, result, 0, status);
}

bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
                           UErrorCode&) const {
    if (result.seenNumber() && exponentSign == 0) {
        // A number has already been consumed.
        return false;
    } else if (exponentSign != 0) {
        // scientific notation always comes after the number
        U_ASSERT(!result.quantity.bogus);
    }

    // Initial offset before any character consumption.
    int32_t initialOffset = segment.getOffset();

    // Return value: whether to ask for more characters.
    bool maybeMore = false;

    // All digits consumed so far.
    number::impl::DecimalQuantity digitsConsumed;
    digitsConsumed.bogus = true;

    // The total number of digits after the decimal place, used for scaling the result.
    int32_t digitsAfterDecimalPlace = 0;

    // The actual grouping and decimal separators used in the string.
    // If non-null, we have seen that token.
    UnicodeString actualGroupingString;
    UnicodeString actualDecimalString;
    actualGroupingString.setToBogus();
    actualDecimalString.setToBogus();

    // Information for two groups: the previous group and the current group.
    //
    // Each group has three pieces of information:
    //
    // Offset: the string position of the beginning of the group, including a leading separator
    // if there was a leading separator. This is needed in case we need to rewind the parse to
    // that position.
    //
    // Separator type:
    // 0 => beginning of string
    // 1 => lead separator is a grouping separator
    // 2 => lead separator is a decimal separator
    //
    // Count: the number of digits in the group. If -1, the group has been validated.
    int32_t currGroupOffset = 0;
    int32_t currGroupSepType = 0;
    int32_t currGroupCount = 0;
    int32_t prevGroupOffset = -1;
    int32_t prevGroupSepType = -1;
    int32_t prevGroupCount = -1;

    while (segment.length() > 0) {
        maybeMore = false;

        // Attempt to match a digit.
        int8_t digit = -1;

        // Try by code point digit value.
        UChar32 cp = segment.getCodePoint();
        if (u_isdigit(cp)) {
            segment.adjustOffset(U16_LENGTH(cp));
            digit = static_cast<int8_t>(u_digit(cp, 10));
        }

        // Try by digit string.
        if (digit == -1 && !fLocalDigitStrings.isNull()) {
            for (int32_t i = 0; i < 10; i++) {
                const UnicodeString& str = fLocalDigitStrings[i];
                if (str.isEmpty()) {
                    continue;
                }
                // The following test is Apple-specific, for <rdar://7632623>;
                // if \u3007 is treated as 0 for parsing, \u96F6 should be too.
                int32_t overlap = (segment.startsWith(0x96F6) && fLocalDigitStrings[0].charAt(0)==0x3007)?
                    1: segment.getCommonPrefixLength(str);
                if (overlap == str.length()) {
                    segment.adjustOffset(overlap);
                    digit = static_cast<int8_t>(i);
                    break;
                }
                maybeMore = maybeMore || (overlap == segment.length());
            }
        }

        if (digit >= 0) {
            // Digit was found.
            if (digitsConsumed.bogus) {
                digitsConsumed.bogus = false;
                digitsConsumed.clear();
            }
            digitsConsumed.appendDigit(digit, 0, true);
            currGroupCount++;
            if (!actualDecimalString.isBogus()) {
                digitsAfterDecimalPlace++;
            }
            continue;
        }

        // Attempt to match a literal grouping or decimal separator.
        bool isDecimal = false;
        bool isGrouping = false;

        // 1) Attempt the decimal separator string literal.
        // if (we have not seen a decimal separator yet) { ... }
        if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
            int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
            maybeMore = maybeMore || (overlap == segment.length());
            if (overlap == decimalSeparator.length()) {
                isDecimal = true;
                actualDecimalString = decimalSeparator;
            }
        }

        // 2) Attempt to match the actual grouping string literal.
        if (!actualGroupingString.isBogus()) {
            int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
            maybeMore = maybeMore || (overlap == segment.length());
            if (overlap == actualGroupingString.length()) {
                isGrouping = true;
            }
        }

        // 2.5) Attempt to match a new the grouping separator string literal.
        // if (we have not seen a grouping or decimal separator yet) { ... }
        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
            !groupingSeparator.isEmpty()) {
            int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
            maybeMore = maybeMore || (overlap == segment.length());
            if (overlap == groupingSeparator.length()) {
                isGrouping = true;
                actualGroupingString = groupingSeparator;
            }
        }

        // 3) Attempt to match a decimal separator from the equivalence set.
        // if (we have not seen a decimal separator yet) { ... }
        // The !isGrouping is to confirm that we haven't yet matched the current character.
        if (!isGrouping && actualDecimalString.isBogus()) {
            if (decimalUniSet->contains(cp)) {
                isDecimal = true;
                actualDecimalString = UnicodeString(cp);
            }
        }

        // 4) Attempt to match a grouping separator from the equivalence set.
        // if (we have not seen a grouping or decimal separator yet) { ... }
        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
            if (groupingUniSet->contains(cp)) {
                isGrouping = true;
                actualGroupingString = UnicodeString(cp);
            }
        }

        // Leave if we failed to match this as a separator.
        if (!isDecimal && !isGrouping) {
            break;
        }

        // Check for conditions when we don't want to accept the separator.
        if (isDecimal && integerOnly) {
            break;
        } else if (currGroupSepType == 2 && isGrouping) {
            // Fraction grouping
            break;
        }

        // Validate intermediate grouping sizes.
        bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
        bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
        if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
            // Invalid grouping sizes.
            if (isGrouping && currGroupCount == 0) {
                // Trailing grouping separators: these are taken care of below
                U_ASSERT(currGroupSepType == 1);
            } else if (requireGroupingMatch) {
                // Strict mode: reject the parse
                digitsConsumed.clear();
                digitsConsumed.bogus = true;
            }
            break;
        } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
            break;
        } else {
            // Grouping sizes OK so far.
            prevGroupOffset = currGroupOffset;
            prevGroupCount = currGroupCount;
            if (isDecimal) {
                // Do not validate this group any more.
                prevGroupSepType = -1;
            } else {
                prevGroupSepType = currGroupSepType;
            }
        }

        // OK to accept the separator.
        // Special case: don't update currGroup if it is empty; this allows two grouping
        // separators in a row in lenient mode.
        if (currGroupCount != 0) {
            currGroupOffset = segment.getOffset();
        }
        currGroupSepType = isGrouping ? 1 : 2;
        currGroupCount = 0;
        if (isGrouping) {
            segment.adjustOffset(actualGroupingString.length());
        } else {
            segment.adjustOffset(actualDecimalString.length());
        }
    }

    // End of main loop.
    // Back up if there was a trailing grouping separator.
    // Shift prev -> curr so we can check it as a final group.
    if (currGroupSepType != 2 && currGroupCount == 0) {
        maybeMore = true;
        segment.setOffset(currGroupOffset);
        currGroupOffset = prevGroupOffset;
        currGroupSepType = prevGroupSepType;
        currGroupCount = prevGroupCount;
        prevGroupOffset = -1;
        prevGroupSepType = 0;
        prevGroupCount = 1;
    }

    // Validate final grouping sizes.
    bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
    bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
    if (!requireGroupingMatch) {
        // The cases we need to handle here are lone digits.
        // Examples: "1,1"  "1,1,"  "1,1,1"  "1,1,1,"  ",1" (all parse as 1)
        // See more examples in numberformattestspecification.txt
        int32_t digitsToRemove = 0;
        if (!prevValidSecondary) {
            segment.setOffset(prevGroupOffset);
            digitsToRemove += prevGroupCount;
            digitsToRemove += currGroupCount;
        } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
            maybeMore = true;
            segment.setOffset(currGroupOffset);
            digitsToRemove += currGroupCount;
        }
        if (digitsToRemove != 0) {
            digitsConsumed.adjustMagnitude(-digitsToRemove);
            digitsConsumed.truncate();
        }
        prevValidSecondary = true;
        currValidPrimary = true;
    }
    if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
        // Grouping failure.
        digitsConsumed.bogus = true;
    }

    // Strings that start with a separator but have no digits,
    // or strings that failed a grouping size check.
    if (digitsConsumed.bogus) {
        maybeMore = maybeMore || (segment.length() == 0);
        segment.setOffset(initialOffset);
        return maybeMore;
    }

    // We passed all inspections. Start post-processing.

    // Adjust for fraction part.
    digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);

    // Set the digits, either normal or exponent.
    if (exponentSign != 0 && segment.getOffset() != initialOffset) {
        bool overflow = false;
        if (digitsConsumed.fitsInLong()) {
            int64_t exponentLong = digitsConsumed.toLong(false);
            U_ASSERT(exponentLong >= 0);
            if (exponentLong <= INT32_MAX) {
                auto exponentInt = static_cast<int32_t>(exponentLong);
                if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
                    overflow = true;
                }
            } else {
                overflow = true;
            }
        } else {
            overflow = true;
        }
        if (overflow) {
            if (exponentSign == -1) {
                // Set to zero
                result.quantity.clear();
            } else {
                // Set to infinity
                result.quantity.bogus = true;
                result.flags |= FLAG_INFINITY;
            }
        }
    } else {
        result.quantity = digitsConsumed;
    }

    // Set other information into the result and return.
    if (!actualDecimalString.isBogus()) {
        result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
    }
    result.setCharsConsumed(segment);
    return segment.length() == 0 || maybeMore;
}

bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const {
    if (requireGroupingMatch) {
        if (sepType == -1) {
            // No such group (prevGroup before first shift).
            return true;
        } else if (sepType == 0) {
            // First group.
            if (isPrimary) {
                // No grouping separators is OK.
                return true;
            } else {
                // return count != 0 && count <= grouping2;
                return count <= grouping2; // Apple <rdar://problem/38565910>, allow initial secondary group of 0
            }
        } else if (sepType == 1) {
            // Middle group.
            if (isPrimary) {
                return count == grouping1;
            } else {
                return count == grouping2;
            }
        } else {
            U_ASSERT(sepType == 2);
            // After the decimal separator.
            return true;
        }
    } else {
        if (sepType == 1) {
            // #11230: don't accept middle groups with only 1 digit.
            return count != 1;
        } else {
            return true;
        }
    }
}

bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
    // The common case uses a static leadSet for efficiency.
    if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
        return segment.startsWith(*leadSet);
    }
    if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) {
        return true;
    }
    if (fLocalDigitStrings.isNull()) {
        return false;
    }
    // The following test is Apple-specific, for <rdar://7632623>;
    // if \u3007 is treated as 0 for parsing, \u96F6 should be too.
    if (segment.startsWith(0x96F6) && fLocalDigitStrings[0].length()==1 && fLocalDigitStrings[0].charAt(0)==0x3007) {
        return true;
    }
    for (int32_t i = 0; i < 10; i++) {
        if (segment.startsWith(fLocalDigitStrings[i])) {
            return true;
        }
    }
    return false;
}

UnicodeString DecimalMatcher::toString() const {
    return u"<Decimal>";
}


#endif /* #if !UCONFIG_NO_FORMATTING */
Commit	Line	Data
0f5d89e8 A	1	// © 2018 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3
	4	#include "unicode/utypes.h"
	5
	6	#if !UCONFIG_NO_FORMATTING
	7
	8	// Allow implicit conversion from char16_t* to UnicodeString for this file:
	9	// Helpful in toString methods and elsewhere.
	10	#define UNISTR_FROM_STRING_EXPLICIT
	11
	12	#include "numparse_types.h"
	13	#include "numparse_decimal.h"
	14	#include "static_unicode_sets.h"
	15	#include "numparse_utils.h"
	16	#include "unicode/uchar.h"
	17	#include "putilimp.h"
	18	#include "number_decimalquantity.h"
	19
	20	using namespace icu;
	21	using namespace icu::numparse;
	22	using namespace icu::numparse::impl;
	23
	24
	25	DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
	26	parse_flags_t parseFlags) {
	27	if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
	28	groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
	29	decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
	30	} else {
	31	groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
	32	decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
	33	}
	34	bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
	35	unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
	36	: unisets::ALL_SEPARATORS;
	37
	38	// Attempt to find separators in the static cache
	39
	40	groupingUniSet = unisets::get(groupingKey);
	41	unisets::Key decimalKey = unisets::chooseFrom(
	42	decimalSeparator,
	43	strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
	44	strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
	45	if (decimalKey >= 0) {
	46	decimalUniSet = unisets::get(decimalKey);
	47	} else if (!decimalSeparator.isEmpty()) {
	48	auto* set = new UnicodeSet();
	49	set->add(decimalSeparator.char32At(0));
	50	set->freeze();
	51	decimalUniSet = set;
	52	fLocalDecimalUniSet.adoptInstead(set);
	53	} else {
	54	decimalUniSet = unisets::get(unisets::EMPTY);
	55	}
	56
	57	if (groupingKey >= 0 && decimalKey >= 0) {
	58	// Everything is available in the static cache
	59	separatorSet = groupingUniSet;
	60	leadSet = unisets::get(
	61	strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
	62	: unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
	63	} else {
	64	auto* set = new UnicodeSet();
65	set->addAll(*groupingUniSet);
66	set->addAll(*decimalUniSet);
67	set->freeze();
68	separatorSet = set;
69	fLocalSeparatorSet.adoptInstead(set);
70	leadSet = nullptr;
71	}
72
73	UChar32 cpZero = symbols.getCodePointZero();
74	if (cpZero == -1 \|\| !u_isdigit(cpZero) \|\| u_digit(cpZero, 10) != 0) {
75	// Uncommon case: okay to allocate.
76	auto digitStrings = new UnicodeString[10];
77	fLocalDigitStrings.adoptInstead(digitStrings);
78	for (int32_t i = 0; i <= 9; i++) {
79	digitStrings[i] = symbols.getConstDigitSymbol(i);
80	}
81	}
82
83	requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
84	groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
85	integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
86	grouping1 = grouper.getPrimary();
87	grouping2 = grouper.getSecondary();
88
89	// Fraction grouping parsing is disabled for now but could be enabled later.
90	// See http://bugs.icu-project.org/trac/ticket/10794
91	// fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED);
92	}
93
94	bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
95	return match(segment, result, 0, status);
96	}
97
98	bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
99	UErrorCode&) const {
100	if (result.seenNumber() && exponentSign == 0) {
101	// A number has already been consumed.
102	return false;
103	} else if (exponentSign != 0) {
104	// scientific notation always comes after the number
105	U_ASSERT(!result.quantity.bogus);
106	}
107
108	// Initial offset before any character consumption.
109	int32_t initialOffset = segment.getOffset();
110
111	// Return value: whether to ask for more characters.
112	bool maybeMore = false;
113
114	// All digits consumed so far.
115	number::impl::DecimalQuantity digitsConsumed;
116	digitsConsumed.bogus = true;
117
118	// The total number of digits after the decimal place, used for scaling the result.
119	int32_t digitsAfterDecimalPlace = 0;
120
121	// The actual grouping and decimal separators used in the string.
122	// If non-null, we have seen that token.
123	UnicodeString actualGroupingString;
124	UnicodeString actualDecimalString;
125	actualGroupingString.setToBogus();
126	actualDecimalString.setToBogus();
127
128	// Information for two groups: the previous group and the current group.
129	//
130	// Each group has three pieces of information:
131	//
132	// Offset: the string position of the beginning of the group, including a leading separator
133	// if there was a leading separator. This is needed in case we need to rewind the parse to
134	// that position.
135	//
136	// Separator type:
137	// 0 => beginning of string
138	// 1 => lead separator is a grouping separator
139	// 2 => lead separator is a decimal separator
140	//
141	// Count: the number of digits in the group. If -1, the group has been validated.
142	int32_t currGroupOffset = 0;
143	int32_t currGroupSepType = 0;
144	int32_t currGroupCount = 0;
145	int32_t prevGroupOffset = -1;
146	int32_t prevGroupSepType = -1;
147	int32_t prevGroupCount = -1;
148
149	while (segment.length() > 0) {
150	maybeMore = false;
151
152	// Attempt to match a digit.
153	int8_t digit = -1;
154
155	// Try by code point digit value.
156	UChar32 cp = segment.getCodePoint();
157	if (u_isdigit(cp)) {
158	segment.adjustOffset(U16_LENGTH(cp));
159	digit = static_cast<int8_t>(u_digit(cp, 10));
160	}
161
162	// Try by digit string.
163	if (digit == -1 && !fLocalDigitStrings.isNull()) {
164	for (int32_t i = 0; i < 10; i++) {
165	const UnicodeString& str = fLocalDigitStrings[i];
166	if (str.isEmpty()) {
167	continue;
168	}
3d1f044b A	169	// The following test is Apple-specific, for <rdar://7632623>;
	170	// if \u3007 is treated as 0 for parsing, \u96F6 should be too.
	171	int32_t overlap = (segment.startsWith(0x96F6) && fLocalDigitStrings[0].charAt(0)==0x3007)?
	172	1: segment.getCommonPrefixLength(str);
0f5d89e8 A	173	if (overlap == str.length()) {
	174	segment.adjustOffset(overlap);
	175	digit = static_cast<int8_t>(i);
	176	break;
	177	}
	178	maybeMore = maybeMore \|\| (overlap == segment.length());
	179	}
	180	}
	181
	182	if (digit >= 0) {
	183	// Digit was found.
	184	if (digitsConsumed.bogus) {
	185	digitsConsumed.bogus = false;
	186	digitsConsumed.clear();
	187	}
	188	digitsConsumed.appendDigit(digit, 0, true);
	189	currGroupCount++;
	190	if (!actualDecimalString.isBogus()) {
	191	digitsAfterDecimalPlace++;
	192	}
	193	continue;
	194	}
	195
	196	// Attempt to match a literal grouping or decimal separator.
	197	bool isDecimal = false;
	198	bool isGrouping = false;
	199
	200	// 1) Attempt the decimal separator string literal.
	201	// if (we have not seen a decimal separator yet) { ... }
	202	if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
	203	int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
	204	maybeMore = maybeMore \|\| (overlap == segment.length());
	205	if (overlap == decimalSeparator.length()) {
	206	isDecimal = true;
	207	actualDecimalString = decimalSeparator;
	208	}
	209	}
	210
	211	// 2) Attempt to match the actual grouping string literal.
	212	if (!actualGroupingString.isBogus()) {
	213	int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
	214	maybeMore = maybeMore \|\| (overlap == segment.length());
	215	if (overlap == actualGroupingString.length()) {
	216	isGrouping = true;
	217	}
	218	}
	219
	220	// 2.5) Attempt to match a new the grouping separator string literal.
	221	// if (we have not seen a grouping or decimal separator yet) { ... }
	222	if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
	223	!groupingSeparator.isEmpty()) {
	224	int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
	225	maybeMore = maybeMore \|\| (overlap == segment.length());
	226	if (overlap == groupingSeparator.length()) {
	227	isGrouping = true;
	228	actualGroupingString = groupingSeparator;
	229	}
	230	}
	231
	232	// 3) Attempt to match a decimal separator from the equivalence set.
	233	// if (we have not seen a decimal separator yet) { ... }
	234	// The !isGrouping is to confirm that we haven't yet matched the current character.
	235	if (!isGrouping && actualDecimalString.isBogus()) {
	236	if (decimalUniSet->contains(cp)) {
237	isDecimal = true;
238	actualDecimalString = UnicodeString(cp);
239	}
240	}
241
242	// 4) Attempt to match a grouping separator from the equivalence set.
243	// if (we have not seen a grouping or decimal separator yet) { ... }
244	if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
245	if (groupingUniSet->contains(cp)) {
246	isGrouping = true;
247	actualGroupingString = UnicodeString(cp);
248	}
249	}
250
251	// Leave if we failed to match this as a separator.
252	if (!isDecimal && !isGrouping) {
253	break;
254	}
255
256	// Check for conditions when we don't want to accept the separator.
257	if (isDecimal && integerOnly) {
258	break;
259	} else if (currGroupSepType == 2 && isGrouping) {
260	// Fraction grouping
261	break;
262	}
263
264	// Validate intermediate grouping sizes.
265	bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
266	bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
267	if (!prevValidSecondary \|\| (isDecimal && !currValidPrimary)) {
268	// Invalid grouping sizes.
269	if (isGrouping && currGroupCount == 0) {
270	// Trailing grouping separators: these are taken care of below
271	U_ASSERT(currGroupSepType == 1);
272	} else if (requireGroupingMatch) {
273	// Strict mode: reject the parse
274	digitsConsumed.clear();
275	digitsConsumed.bogus = true;
276	}
277	break;
278	} else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
279	break;
280	} else {
281	// Grouping sizes OK so far.
282	prevGroupOffset = currGroupOffset;
283	prevGroupCount = currGroupCount;
284	if (isDecimal) {
285	// Do not validate this group any more.
286	prevGroupSepType = -1;
287	} else {
288	prevGroupSepType = currGroupSepType;
289	}
290	}
291
292	// OK to accept the separator.
293	// Special case: don't update currGroup if it is empty; this allows two grouping
294	// separators in a row in lenient mode.
295	if (currGroupCount != 0) {
296	currGroupOffset = segment.getOffset();
297	}
298	currGroupSepType = isGrouping ? 1 : 2;
299	currGroupCount = 0;
300	if (isGrouping) {
301	segment.adjustOffset(actualGroupingString.length());
302	} else {
303	segment.adjustOffset(actualDecimalString.length());
304	}
305	}
306
307	// End of main loop.
308	// Back up if there was a trailing grouping separator.
309	// Shift prev -> curr so we can check it as a final group.
310	if (currGroupSepType != 2 && currGroupCount == 0) {
311	maybeMore = true;
312	segment.setOffset(currGroupOffset);
313	currGroupOffset = prevGroupOffset;
314	currGroupSepType = prevGroupSepType;
315	currGroupCount = prevGroupCount;
316	prevGroupOffset = -1;
317	prevGroupSepType = 0;
318	prevGroupCount = 1;
319	}
320
321	// Validate final grouping sizes.
322	bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
323	bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
324	if (!requireGroupingMatch) {
325	// The cases we need to handle here are lone digits.
326	// Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1)
327	// See more examples in numberformattestspecification.txt
328	int32_t digitsToRemove = 0;
329	if (!prevValidSecondary) {
330	segment.setOffset(prevGroupOffset);
331	digitsToRemove += prevGroupCount;
332	digitsToRemove += currGroupCount;
333	} else if (!currValidPrimary && (prevGroupSepType != 0 \|\| prevGroupCount != 0)) {
334	maybeMore = true;
335	segment.setOffset(currGroupOffset);
336	digitsToRemove += currGroupCount;
337	}
338	if (digitsToRemove != 0) {
339	digitsConsumed.adjustMagnitude(-digitsToRemove);
340	digitsConsumed.truncate();
341	}
342	prevValidSecondary = true;
343	currValidPrimary = true;
344	}
345	if (currGroupSepType != 2 && (!prevValidSecondary \|\| !currValidPrimary)) {
346	// Grouping failure.
347	digitsConsumed.bogus = true;
348	}
349
350	// Strings that start with a separator but have no digits,
351	// or strings that failed a grouping size check.
352	if (digitsConsumed.bogus) {
353	maybeMore = maybeMore \|\| (segment.length() == 0);
354	segment.setOffset(initialOffset);
355	return maybeMore;
356	}
357
358	// We passed all inspections. Start post-processing.
359
360	// Adjust for fraction part.
361	digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
362
363	// Set the digits, either normal or exponent.
364	if (exponentSign != 0 && segment.getOffset() != initialOffset) {
365	bool overflow = false;
366	if (digitsConsumed.fitsInLong()) {
367	int64_t exponentLong = digitsConsumed.toLong(false);
368	U_ASSERT(exponentLong >= 0);
369	if (exponentLong <= INT32_MAX) {
370	auto exponentInt = static_cast<int32_t>(exponentLong);
371	if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
372	overflow = true;
373	}
374	} else {
375	overflow = true;
376	}
377	} else {
378	overflow = true;
379	}
380	if (overflow) {
381	if (exponentSign == -1) {
382	// Set to zero
383	result.quantity.clear();
384	} else {
385	// Set to infinity
386	result.quantity.bogus = true;
387	result.flags \|= FLAG_INFINITY;
388	}
389	}
390	} else {
391	result.quantity = digitsConsumed;
392	}
393
394	// Set other information into the result and return.
395	if (!actualDecimalString.isBogus()) {
396	result.flags \|= FLAG_HAS_DECIMAL_SEPARATOR;
397	}
398	result.setCharsConsumed(segment);
399	return segment.length() == 0 \|\| maybeMore;
400	}
401
402	bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const {
403	if (requireGroupingMatch) {
404	if (sepType == -1) {
405	// No such group (prevGroup before first shift).
406	return true;
407	} else if (sepType == 0) {
408	// First group.
409	if (isPrimary) {
410	// No grouping separators is OK.
411	return true;
412	} else {
3d1f044b A	413	// return count != 0 && count <= grouping2;
3d1f044b A	414	return count <= grouping2; // Apple <rdar://problem/38565910>, allow initial secondary group of 0
0f5d89e8 A	415	}
	416	} else if (sepType == 1) {
	417	// Middle group.
	418	if (isPrimary) {
	419	return count == grouping1;
	420	} else {
	421	return count == grouping2;
	422	}
	423	} else {
	424	U_ASSERT(sepType == 2);
	425	// After the decimal separator.
	426	return true;
	427	}
	428	} else {
	429	if (sepType == 1) {
	430	// #11230: don't accept middle groups with only 1 digit.
	431	return count != 1;
	432	} else {
	433	return true;
	434	}
	435	}
	436	}
	437
	438	bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
	439	// The common case uses a static leadSet for efficiency.
	440	if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
	441	return segment.startsWith(*leadSet);
	442	}
	443	if (segment.startsWith(*separatorSet) \|\| u_isdigit(segment.getCodePoint())) {
	444	return true;
	445	}
	446	if (fLocalDigitStrings.isNull()) {
	447	return false;
	448	}
3d1f044b A	449	// The following test is Apple-specific, for <rdar://7632623>;
	450	// if \u3007 is treated as 0 for parsing, \u96F6 should be too.
	451	if (segment.startsWith(0x96F6) && fLocalDigitStrings[0].length()==1 && fLocalDigitStrings[0].charAt(0)==0x3007) {
	452	return true;
	453	}
0f5d89e8 A	454	for (int32_t i = 0; i < 10; i++) {
	455	if (segment.startsWith(fLocalDigitStrings[i])) {
	456	return true;
	457	}
	458	}
	459	return false;
	460	}
	461
	462	UnicodeString DecimalMatcher::toString() const {
	463	return u"<Decimal>";
	464	}
	465
	466
	467	#endif /* #if !UCONFIG_NO_FORMATTING */