[apple/icu.git] / icuSources / i18n / numparse_types.h

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING
#ifndef __NUMPARSE_TYPES_H__
#define __NUMPARSE_TYPES_H__

#include "unicode/uobject.h"
#include "number_decimalquantity.h"

U_NAMESPACE_BEGIN namespace numparse {
namespace impl {

// Forward-declarations
class StringSegment;
class ParsedNumber;

typedef int32_t result_flags_t;
typedef int32_t parse_flags_t;

/** Flags for the type result_flags_t */
enum ResultFlags {
    FLAG_NEGATIVE = 0x0001,
    FLAG_PERCENT = 0x0002,
    FLAG_PERMILLE = 0x0004,
    FLAG_HAS_EXPONENT = 0x0008,
    // FLAG_HAS_DEFAULT_CURRENCY = 0x0010, // no longer used
    FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
    FLAG_NAN = 0x0040,
    FLAG_INFINITY = 0x0080,
    FLAG_FAIL = 0x0100,
};

/** Flags for the type parse_flags_t */
enum ParseFlags {
    PARSE_FLAG_IGNORE_CASE = 0x0001,
    PARSE_FLAG_MONETARY_SEPARATORS = 0x0002,
    PARSE_FLAG_STRICT_SEPARATORS = 0x0004,
    PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008,
    PARSE_FLAG_INTEGER_ONLY = 0x0010,
    PARSE_FLAG_GROUPING_DISABLED = 0x0020,
    // PARSE_FLAG_FRACTION_GROUPING_ENABLED = 0x0040, // see #10794
    PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080,
    PARSE_FLAG_USE_FULL_AFFIXES = 0x0100,
    PARSE_FLAG_EXACT_AFFIX = 0x0200,
    PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
    // PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used
    // PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
    PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000,
    PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000,
    PARSE_FLAG_HAS_TRAIL_CURRENCY = 0x0800, // Apple <rdar://problem/51938595>
};


// TODO: Is this class worthwhile?
template<int32_t stackCapacity>
class CompactUnicodeString {
  public:
    CompactUnicodeString() {
        static_assert(stackCapacity > 0, "cannot have zero space on stack");
        fBuffer[0] = 0;
    }

    CompactUnicodeString(const UnicodeString& text)
            : fBuffer(text.length() + 1) {
        uprv_memcpy(fBuffer.getAlias(), text.getBuffer(), sizeof(UChar) * text.length());
        fBuffer[text.length()] = 0;
    }

    inline UnicodeString toAliasedUnicodeString() const {
        return UnicodeString(TRUE, fBuffer.getAlias(), -1);
    }

    bool operator==(const CompactUnicodeString& other) const {
        // Use the alias-only constructor and then call UnicodeString operator==
        return toAliasedUnicodeString() == other.toAliasedUnicodeString();
    }

  private:
    MaybeStackArray<UChar, stackCapacity> fBuffer;
};


/**
 * Struct-like class to hold the results of a parsing routine.
 *
 * @author sffc
 */
// Exported as U_I18N_API for tests
class U_I18N_API ParsedNumber {
  public:

    /**
     * The numerical value that was parsed.
     */
    ::icu::number::impl::DecimalQuantity quantity;

    /**
     * The index of the last char consumed during parsing. If parsing started at index 0, this is equal
     * to the number of chars consumed. This is NOT necessarily the same as the StringSegment offset;
     * "weak" chars, like whitespace, change the offset, but the charsConsumed is not touched until a
     * "strong" char is encountered.
     */
    int32_t charEnd;

    /**
     * Boolean flags (see constants above).
     */
    result_flags_t flags;

    /**
     * The pattern string corresponding to the prefix that got consumed.
     */
    UnicodeString prefix;

    /**
     * The pattern string corresponding to the suffix that got consumed.
     */
    UnicodeString suffix;

    /**
     * The currency that got consumed.
     */
    UChar currencyCode[4];

    ParsedNumber();

    ParsedNumber(const ParsedNumber& other) = default;

    ParsedNumber& operator=(const ParsedNumber& other) = default;

    void clear();

    /**
     * Call this method to register that a "strong" char was consumed. This should be done after calling
     * {@link StringSegment#setOffset} or {@link StringSegment#adjustOffset} except when the char is
     * "weak", like whitespace.
     *
     * <p>
     * <strong>What is a strong versus weak char?</strong> The behavior of number parsing is to "stop"
     * after reading the number, even if there is other content following the number. For example, after
     * parsing the string "123 " (123 followed by a space), the cursor should be set to 3, not 4, even
     * though there are matchers that accept whitespace. In this example, the digits are strong, whereas
     * the whitespace is weak. Grouping separators are weak, whereas decimal separators are strong. Most
     * other chars are strong.
     *
     * @param segment
     *            The current StringSegment, usually immediately following a call to setOffset.
     */
    void setCharsConsumed(const StringSegment& segment);

    /** Apply certain number-related flags to the DecimalQuantity. */
    void postProcess();

    /**
     * Returns whether this the parse was successful. To be successful, at least one char must have been
     * consumed, and the failure flag must not be set.
     */
    bool success() const;

    bool seenNumber() const;

    double getDouble(UErrorCode& status) const;

    void populateFormattable(Formattable& output, parse_flags_t parseFlags) const;

    bool isBetterThan(const ParsedNumber& other);
};


/**
 * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
 * subSequence methods all operate relative to the fixed offset into the String.
 *
 * @author sffc
 */
// Exported as U_I18N_API for tests
class U_I18N_API StringSegment : public UMemory {
  public:
    StringSegment(const UnicodeString& str, bool ignoreCase);

    int32_t getOffset() const;

    void setOffset(int32_t start);

    /**
     * Equivalent to <code>setOffset(getOffset()+delta)</code>.
     *
     * <p>
     * This method is usually called by a Matcher to register that a char was consumed. If the char is
     * strong (it usually is, except for things like whitespace), follow this with a call to
     * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
     */
    void adjustOffset(int32_t delta);

    /**
     * Adjusts the offset by the width of the current code point, either 1 or 2 chars.
     */
    void adjustOffsetByCodePoint();

    void setLength(int32_t length);

    void resetLength();

    int32_t length() const;

    char16_t charAt(int32_t index) const;

    UChar32 codePointAt(int32_t index) const;

    UnicodeString toUnicodeString() const;

    const UnicodeString toTempUnicodeString() const;

    /**
     * Returns the first code point in the string segment, or -1 if the string starts with an invalid
     * code point.
     *
     * <p>
     * <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
     * folding logic, instead of this method.
     */
    UChar32 getCodePoint() const;

    /**
     * Returns true if the first code point of this StringSegment equals the given code point.
     *
     * <p>
     * This method will perform case folding if case folding is enabled for the parser.
     */
    bool startsWith(UChar32 otherCp) const;

    /**
     * Returns true if the first code point of this StringSegment is in the given UnicodeSet.
     */
    bool startsWith(const UnicodeSet& uniset) const;

    /**
     * Returns true if there is at least one code point of overlap between this StringSegment and the
     * given UnicodeString.
     */
    bool startsWith(const UnicodeString& other) const;

    /**
     * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
     * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
     * since the first 2 characters are the same.
     *
     * <p>
     * This method only returns offsets along code point boundaries.
     *
     * <p>
     * This method will perform case folding if case folding was enabled in the constructor.
     *
     * <p>
     * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
     */
    int32_t getCommonPrefixLength(const UnicodeString& other);

    /**
     * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
     * enabled for the parser.
     */
    int32_t getCaseSensitivePrefixLength(const UnicodeString& other);

    bool operator==(const UnicodeString& other) const;

  private:
    const UnicodeString& fStr;
    int32_t fStart;
    int32_t fEnd;
    bool fFoldCase;

    int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);

    static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
};


/**
 * The core interface implemented by all matchers used for number parsing.
 *
 * Given a string, there should NOT be more than one way to consume the string with the same matcher
 * applied multiple times. If there is, the non-greedy parsing algorithm will be unhappy and may enter an
 * exponential-time loop. For example, consider the "A Matcher" that accepts "any number of As". Given
 * the string "AAAA", there are 2^N = 8 ways to apply the A Matcher to this string: you could have the A
 * Matcher apply 4 times to each character; you could have it apply just once to all the characters; you
 * could have it apply to the first 2 characters and the second 2 characters; and so on. A better version
 * of the "A Matcher" would be for it to accept exactly one A, and allow the algorithm to run it
 * repeatedly to consume a string of multiple As. The A Matcher can implement the Flexible interface
 * below to signal that it can be applied multiple times in a row.
 *
 * @author sffc
 */
// Exported as U_I18N_API for tests
class U_I18N_API NumberParseMatcher {
  public:
    virtual ~NumberParseMatcher();

    /**
     * Matchers can override this method to return true to indicate that they are optional and can be run
     * repeatedly. Used by SeriesMatcher, primarily in the context of IgnorablesMatcher.
     */
    virtual bool isFlexible() const {
        return false;
    }

    /**
     * Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds
     * something interesting in the StringSegment, it should update the offset of the StringSegment
     * corresponding to how many chars were matched.
     *
     * This method is thread-safe.
     *
     * @param segment
     *            The StringSegment to match against. Matches always start at the beginning of the
     *            segment. The segment is guaranteed to contain at least one char.
     * @param result
     *            The data structure to store results if the match succeeds.
     * @return Whether this matcher thinks there may be more interesting chars beyond the end of the
     *         string segment.
     */
    virtual bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const = 0;

    /**
     * Performs a fast "smoke check" for whether or not this matcher could possibly match against the
     * given string segment. The test should be as fast as possible but also as restrictive as possible.
     * For example, matchers can maintain a UnicodeSet of all code points that count possibly start a
     * match. Matchers should use the {@link StringSegment#startsWith} method in order to correctly
     * handle case folding.
     *
     * @param segment
     *            The segment to check against.
     * @return true if the matcher might be able to match against this segment; false if it definitely
     *         will not be able to match.
     */
    virtual bool smokeTest(const StringSegment& segment) const = 0;

    /**
     * Method called at the end of a parse, after all matchers have failed to consume any more chars.
     * Allows a matcher to make final modifications to the result given the knowledge that no more
     * matches are possible.
     *
     * @param result
     *            The data structure to store results.
     */
    virtual void postProcess(ParsedNumber&) const {
        // Default implementation: no-op
    }

    // String for debugging
    virtual UnicodeString toString() const = 0;

  protected:
    // No construction except by subclasses!
    NumberParseMatcher() = default;
};


/**
 * Interface for use in arguments.
 */
// Exported as U_I18N_API for tests
class U_I18N_API MutableMatcherCollection {
  public:
    virtual ~MutableMatcherCollection() = default;

    virtual void addMatcher(NumberParseMatcher& matcher) = 0;
};


} // namespace impl
} // namespace numparse
U_NAMESPACE_END

#endif //__NUMPARSE_TYPES_H__
#endif /* #if !UCONFIG_NO_FORMATTING */
Commit	Line	Data
0f5d89e8 A	1	// © 2018 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3
	4	#include "unicode/utypes.h"
	5
	6	#if !UCONFIG_NO_FORMATTING
	7	#ifndef __NUMPARSE_TYPES_H__
	8	#define __NUMPARSE_TYPES_H__
	9
	10	#include "unicode/uobject.h"
	11	#include "number_decimalquantity.h"
	12
	13	U_NAMESPACE_BEGIN namespace numparse {
	14	namespace impl {
	15
	16	// Forward-declarations
	17	class StringSegment;
	18	class ParsedNumber;
	19
	20	typedef int32_t result_flags_t;
	21	typedef int32_t parse_flags_t;
	22
	23	/** Flags for the type result_flags_t */
	24	enum ResultFlags {
	25	FLAG_NEGATIVE = 0x0001,
	26	FLAG_PERCENT = 0x0002,
	27	FLAG_PERMILLE = 0x0004,
	28	FLAG_HAS_EXPONENT = 0x0008,
	29	// FLAG_HAS_DEFAULT_CURRENCY = 0x0010, // no longer used
	30	FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
	31	FLAG_NAN = 0x0040,
	32	FLAG_INFINITY = 0x0080,
	33	FLAG_FAIL = 0x0100,
	34	};
	35
	36	/** Flags for the type parse_flags_t */
	37	enum ParseFlags {
	38	PARSE_FLAG_IGNORE_CASE = 0x0001,
	39	PARSE_FLAG_MONETARY_SEPARATORS = 0x0002,
	40	PARSE_FLAG_STRICT_SEPARATORS = 0x0004,
	41	PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008,
	42	PARSE_FLAG_INTEGER_ONLY = 0x0010,
	43	PARSE_FLAG_GROUPING_DISABLED = 0x0020,
	44	// PARSE_FLAG_FRACTION_GROUPING_ENABLED = 0x0040, // see #10794
	45	PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080,
	46	PARSE_FLAG_USE_FULL_AFFIXES = 0x0100,
	47	PARSE_FLAG_EXACT_AFFIX = 0x0200,
	48	PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
	49	// PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used
	50	// PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
	51	PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000,
3d1f044b	52	PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000,
c5116b9f	53	PARSE_FLAG_HAS_TRAIL_CURRENCY = 0x0800, // Apple <rdar://problem/51938595>
0f5d89e8 A	54	};
	55
	56
	57	// TODO: Is this class worthwhile?
	58	template<int32_t stackCapacity>
	59	class CompactUnicodeString {
	60	public:
	61	CompactUnicodeString() {
	62	static_assert(stackCapacity > 0, "cannot have zero space on stack");
	63	fBuffer[0] = 0;
	64	}
	65
	66	CompactUnicodeString(const UnicodeString& text)
	67	: fBuffer(text.length() + 1) {
3d1f044b	68	uprv_memcpy(fBuffer.getAlias(), text.getBuffer(), sizeof(UChar) * text.length());
0f5d89e8 A	69	fBuffer[text.length()] = 0;
	70	}
	71
	72	inline UnicodeString toAliasedUnicodeString() const {
	73	return UnicodeString(TRUE, fBuffer.getAlias(), -1);
	74	}
	75
	76	bool operator==(const CompactUnicodeString& other) const {
	77	// Use the alias-only constructor and then call UnicodeString operator==
	78	return toAliasedUnicodeString() == other.toAliasedUnicodeString();
	79	}
	80
	81	private:
	82	MaybeStackArray<UChar, stackCapacity> fBuffer;
	83	};
	84
	85
	86	/**
	87	* Struct-like class to hold the results of a parsing routine.
	88	*
	89	* @author sffc
	90	*/
	91	// Exported as U_I18N_API for tests
	92	class U_I18N_API ParsedNumber {
	93	public:
	94
	95	/**
	96	* The numerical value that was parsed.
	97	*/
	98	::icu::number::impl::DecimalQuantity quantity;
	99
	100	/**
	101	* The index of the last char consumed during parsing. If parsing started at index 0, this is equal
	102	* to the number of chars consumed. This is NOT necessarily the same as the StringSegment offset;
	103	* "weak" chars, like whitespace, change the offset, but the charsConsumed is not touched until a
	104	* "strong" char is encountered.
	105	*/
	106	int32_t charEnd;
	107
	108	/**
	109	* Boolean flags (see constants above).
	110	*/
	111	result_flags_t flags;
	112
	113	/**
	114	* The pattern string corresponding to the prefix that got consumed.
	115	*/
	116	UnicodeString prefix;
	117
	118	/**
	119	* The pattern string corresponding to the suffix that got consumed.
	120	*/
	121	UnicodeString suffix;
	122
	123	/**
	124	* The currency that got consumed.
	125	*/
	126	UChar currencyCode[4];
	127
	128	ParsedNumber();
	129
	130	ParsedNumber(const ParsedNumber& other) = default;
	131
	132	ParsedNumber& operator=(const ParsedNumber& other) = default;
133
134	void clear();
135
136	/**
137	* Call this method to register that a "strong" char was consumed. This should be done after calling
138	* {@link StringSegment#setOffset} or {@link StringSegment#adjustOffset} except when the char is
139	* "weak", like whitespace.
140	*
141	* <p>
142	* <strong>What is a strong versus weak char?</strong> The behavior of number parsing is to "stop"
143	* after reading the number, even if there is other content following the number. For example, after
144	* parsing the string "123 " (123 followed by a space), the cursor should be set to 3, not 4, even
145	* though there are matchers that accept whitespace. In this example, the digits are strong, whereas
146	* the whitespace is weak. Grouping separators are weak, whereas decimal separators are strong. Most
147	* other chars are strong.
148	*
149	* @param segment
150	* The current StringSegment, usually immediately following a call to setOffset.
151	*/
152	void setCharsConsumed(const StringSegment& segment);
153
154	/** Apply certain number-related flags to the DecimalQuantity. */
155	void postProcess();
156
157	/**
158	* Returns whether this the parse was successful. To be successful, at least one char must have been
159	* consumed, and the failure flag must not be set.
160	*/
161	bool success() const;
162
163	bool seenNumber() const;
164
3d1f044b	165	double getDouble(UErrorCode& status) const;
0f5d89e8 A	166
	167	void populateFormattable(Formattable& output, parse_flags_t parseFlags) const;
	168
	169	bool isBetterThan(const ParsedNumber& other);
	170	};
	171
	172
	173	/**
	174	* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
	175	* subSequence methods all operate relative to the fixed offset into the String.
	176	*
	177	* @author sffc
	178	*/
	179	// Exported as U_I18N_API for tests
	180	class U_I18N_API StringSegment : public UMemory {
	181	public:
	182	StringSegment(const UnicodeString& str, bool ignoreCase);
	183
	184	int32_t getOffset() const;
	185
	186	void setOffset(int32_t start);
	187
	188	/**
	189	* Equivalent to <code>setOffset(getOffset()+delta)</code>.
	190	*
	191	* <p>
	192	* This method is usually called by a Matcher to register that a char was consumed. If the char is
	193	* strong (it usually is, except for things like whitespace), follow this with a call to
	194	* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
	195	*/
	196	void adjustOffset(int32_t delta);
	197
	198	/**
	199	* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
	200	*/
	201	void adjustOffsetByCodePoint();
	202
	203	void setLength(int32_t length);
	204
	205	void resetLength();
	206
	207	int32_t length() const;
	208
	209	char16_t charAt(int32_t index) const;
	210
	211	UChar32 codePointAt(int32_t index) const;
	212
	213	UnicodeString toUnicodeString() const;
	214
	215	const UnicodeString toTempUnicodeString() const;
	216
	217	/**
	218	* Returns the first code point in the string segment, or -1 if the string starts with an invalid
	219	* code point.
	220	*
	221	* <p>
	222	* <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
	223	* folding logic, instead of this method.
	224	*/
	225	UChar32 getCodePoint() const;
	226
	227	/**
	228	* Returns true if the first code point of this StringSegment equals the given code point.
	229	*
230	* <p>
231	* This method will perform case folding if case folding is enabled for the parser.
232	*/
233	bool startsWith(UChar32 otherCp) const;
234
235	/**
236	* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
237	*/
238	bool startsWith(const UnicodeSet& uniset) const;
239
240	/**
241	* Returns true if there is at least one code point of overlap between this StringSegment and the
242	* given UnicodeString.
243	*/
244	bool startsWith(const UnicodeString& other) const;
245
246	/**
247	* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
248	* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
249	* since the first 2 characters are the same.
250	*
251	* <p>
252	* This method only returns offsets along code point boundaries.
253	*
254	* <p>
255	* This method will perform case folding if case folding was enabled in the constructor.
256	*
257	* <p>
258	* IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
259	*/
260	int32_t getCommonPrefixLength(const UnicodeString& other);
261
262	/**
263	* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
264	* enabled for the parser.
265	*/
266	int32_t getCaseSensitivePrefixLength(const UnicodeString& other);
267
268	bool operator==(const UnicodeString& other) const;
269
270	private:
3d1f044b	271	const UnicodeString& fStr;
0f5d89e8 A	272	int32_t fStart;
	273	int32_t fEnd;
	274	bool fFoldCase;
	275
	276	int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);
	277
	278	static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
	279	};
	280
	281
	282	/**
	283	* The core interface implemented by all matchers used for number parsing.
	284	*
	285	* Given a string, there should NOT be more than one way to consume the string with the same matcher
	286	* applied multiple times. If there is, the non-greedy parsing algorithm will be unhappy and may enter an
	287	* exponential-time loop. For example, consider the "A Matcher" that accepts "any number of As". Given
	288	* the string "AAAA", there are 2^N = 8 ways to apply the A Matcher to this string: you could have the A
	289	* Matcher apply 4 times to each character; you could have it apply just once to all the characters; you
	290	* could have it apply to the first 2 characters and the second 2 characters; and so on. A better version
	291	* of the "A Matcher" would be for it to accept exactly one A, and allow the algorithm to run it
	292	* repeatedly to consume a string of multiple As. The A Matcher can implement the Flexible interface
	293	* below to signal that it can be applied multiple times in a row.
	294	*
	295	* @author sffc
	296	*/
	297	// Exported as U_I18N_API for tests
	298	class U_I18N_API NumberParseMatcher {
	299	public:
	300	virtual ~NumberParseMatcher();
	301
	302	/**
	303	* Matchers can override this method to return true to indicate that they are optional and can be run
	304	* repeatedly. Used by SeriesMatcher, primarily in the context of IgnorablesMatcher.
	305	*/
	306	virtual bool isFlexible() const {
	307	return false;
	308	}
	309
	310	/**
	311	* Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds
	312	* something interesting in the StringSegment, it should update the offset of the StringSegment
	313	* corresponding to how many chars were matched.
	314	*
	315	* This method is thread-safe.
	316	*
	317	* @param segment
	318	* The StringSegment to match against. Matches always start at the beginning of the
	319	* segment. The segment is guaranteed to contain at least one char.
	320	* @param result
	321	* The data structure to store results if the match succeeds.
	322	* @return Whether this matcher thinks there may be more interesting chars beyond the end of the
	323	* string segment.
	324	*/
	325	virtual bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const = 0;
	326
	327	/**
	328	* Performs a fast "smoke check" for whether or not this matcher could possibly match against the
	329	* given string segment. The test should be as fast as possible but also as restrictive as possible.
	330	* For example, matchers can maintain a UnicodeSet of all code points that count possibly start a
	331	* match. Matchers should use the {@link StringSegment#startsWith} method in order to correctly
	332	* handle case folding.
	333	*
	334	* @param segment
	335	* The segment to check against.
336	* @return true if the matcher might be able to match against this segment; false if it definitely
337	* will not be able to match.
338	*/
339	virtual bool smokeTest(const StringSegment& segment) const = 0;
340
341	/**
342	* Method called at the end of a parse, after all matchers have failed to consume any more chars.
343	* Allows a matcher to make final modifications to the result given the knowledge that no more
344	* matches are possible.
345	*
346	* @param result
347	* The data structure to store results.
348	*/
349	virtual void postProcess(ParsedNumber&) const {
350	// Default implementation: no-op
3d1f044b	351	}
0f5d89e8 A	352
	353	// String for debugging
	354	virtual UnicodeString toString() const = 0;
	355
	356	protected:
	357	// No construction except by subclasses!
	358	NumberParseMatcher() = default;
	359	};
	360
	361
	362	/**
	363	* Interface for use in arguments.
	364	*/
	365	// Exported as U_I18N_API for tests
	366	class U_I18N_API MutableMatcherCollection {
	367	public:
	368	virtual ~MutableMatcherCollection() = default;
	369
	370	virtual void addMatcher(NumberParseMatcher& matcher) = 0;
	371	};
	372
	373
	374	} // namespace impl
	375	} // namespace numparse
	376	U_NAMESPACE_END
	377
	378	#endif //__NUMPARSE_TYPES_H__
	379	#endif /* #if !UCONFIG_NO_FORMATTING */