]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
73c04bcf | 2 | * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. |
b75a7d8f A |
3 | ********************************************************************** |
4 | * Date Name Description | |
5 | * 07/18/01 aliu Creation. | |
6 | ********************************************************************** | |
7 | */ | |
8 | #ifndef UNIMATCH_H | |
9 | #define UNIMATCH_H | |
10 | ||
11 | #include "unicode/utypes.h" | |
12 | ||
73c04bcf A |
13 | /** |
14 | * \file | |
15 | * \brief C++ API: Unicode Matcher | |
16 | */ | |
17 | ||
18 | ||
b75a7d8f A |
19 | U_NAMESPACE_BEGIN |
20 | ||
21 | class Replaceable; | |
22 | class UnicodeString; | |
23 | class UnicodeSet; | |
24 | ||
25 | /** | |
26 | * Constants returned by <code>UnicodeMatcher::matches()</code> | |
27 | * indicating the degree of match. | |
374ca955 | 28 | * @stable ICU 2.4 |
b75a7d8f A |
29 | */ |
30 | enum UMatchDegree { | |
31 | /** | |
32 | * Constant returned by <code>matches()</code> indicating a | |
33 | * mismatch between the text and this matcher. The text contains | |
34 | * a character which does not match, or the text does not contain | |
35 | * all desired characters for a non-incremental match. | |
374ca955 | 36 | * @stable ICU 2.4 |
b75a7d8f A |
37 | */ |
38 | U_MISMATCH, | |
39 | ||
40 | /** | |
41 | * Constant returned by <code>matches()</code> indicating a | |
42 | * partial match between the text and this matcher. This value is | |
43 | * only returned for incremental match operations. All characters | |
44 | * of the text match, but more characters are required for a | |
45 | * complete match. Alternatively, for variable-length matchers, | |
46 | * all characters of the text match, and if more characters were | |
47 | * supplied at limit, they might also match. | |
374ca955 | 48 | * @stable ICU 2.4 |
b75a7d8f A |
49 | */ |
50 | U_PARTIAL_MATCH, | |
51 | ||
52 | /** | |
53 | * Constant returned by <code>matches()</code> indicating a | |
54 | * complete match between the text and this matcher. For an | |
55 | * incremental variable-length match, this value is returned if | |
56 | * the given text matches, and it is known that additional | |
57 | * characters would not alter the extent of the match. | |
374ca955 | 58 | * @stable ICU 2.4 |
b75a7d8f A |
59 | */ |
60 | U_MATCH | |
61 | }; | |
62 | ||
63 | /** | |
64 | * <code>UnicodeMatcher</code> defines a protocol for objects that can | |
65 | * match a range of characters in a Replaceable string. | |
374ca955 | 66 | * @stable ICU 2.4 |
b75a7d8f A |
67 | */ |
68 | class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { | |
69 | ||
70 | public: | |
71 | /** | |
72 | * Destructor. | |
374ca955 | 73 | * @stable ICU 2.4 |
b75a7d8f | 74 | */ |
374ca955 | 75 | virtual ~UnicodeMatcher(); |
b75a7d8f A |
76 | |
77 | /** | |
78 | * Return a UMatchDegree value indicating the degree of match for | |
79 | * the given text at the given offset. Zero, one, or more | |
80 | * characters may be matched. | |
81 | * | |
82 | * Matching in the forward direction is indicated by limit > | |
83 | * offset. Characters from offset forwards to limit-1 will be | |
84 | * considered for matching. | |
85 | * | |
86 | * Matching in the reverse direction is indicated by limit < | |
87 | * offset. Characters from offset backwards to limit+1 will be | |
88 | * considered for matching. | |
89 | * | |
90 | * If limit == offset then the only match possible is a zero | |
91 | * character match (which subclasses may implement if desired). | |
92 | * | |
93 | * As a side effect, advance the offset parameter to the limit of | |
94 | * the matched substring. In the forward direction, this will be | |
95 | * the index of the last matched character plus one. In the | |
96 | * reverse direction, this will be the index of the last matched | |
97 | * character minus one. | |
98 | * | |
99 | * <p>Note: This method is not const because some classes may | |
100 | * modify their state as the result of a match. | |
101 | * | |
102 | * @param text the text to be matched | |
103 | * @param offset on input, the index into text at which to begin | |
104 | * matching. On output, the limit of the matched text. The | |
105 | * number of matched characters is the output value of offset | |
106 | * minus the input value. Offset should always point to the | |
107 | * HIGH SURROGATE (leading code unit) of a pair of surrogates, | |
108 | * both on entry and upon return. | |
109 | * @param limit the limit index of text to be matched. Greater | |
110 | * than offset for a forward direction match, less than offset for | |
111 | * a backward direction match. The last character to be | |
112 | * considered for matching will be text.charAt(limit-1) in the | |
113 | * forward direction or text.charAt(limit+1) in the backward | |
114 | * direction. | |
115 | * @param incremental if TRUE, then assume further characters may | |
116 | * be inserted at limit and check for partial matching. Otherwise | |
117 | * assume the text as given is complete. | |
118 | * @return a match degree value indicating a full match, a partial | |
119 | * match, or a mismatch. If incremental is FALSE then | |
120 | * U_PARTIAL_MATCH should never be returned. | |
374ca955 | 121 | * @stable ICU 2.4 |
b75a7d8f A |
122 | */ |
123 | virtual UMatchDegree matches(const Replaceable& text, | |
124 | int32_t& offset, | |
125 | int32_t limit, | |
126 | UBool incremental) = 0; | |
127 | ||
128 | /** | |
129 | * Returns a string representation of this matcher. If the result of | |
130 | * calling this function is passed to the appropriate parser, it | |
131 | * will produce another matcher that is equal to this one. | |
132 | * @param result the string to receive the pattern. Previous | |
133 | * contents will be deleted. | |
134 | * @param escapeUnprintable if TRUE then convert unprintable | |
374ca955 A |
135 | * character to their hex escape representations, \\uxxxx or |
136 | * \\Uxxxxxxxx. Unprintable characters are those other than | |
b75a7d8f | 137 | * U+000A, U+0020..U+007E. |
374ca955 | 138 | * @stable ICU 2.4 |
b75a7d8f A |
139 | */ |
140 | virtual UnicodeString& toPattern(UnicodeString& result, | |
141 | UBool escapeUnprintable = FALSE) const = 0; | |
142 | ||
143 | /** | |
144 | * Returns TRUE if this matcher will match a character c, where c | |
145 | * & 0xFF == v, at offset, in the forward direction (with limit > | |
146 | * offset). This is used by <tt>RuleBasedTransliterator</tt> for | |
147 | * indexing. | |
374ca955 | 148 | * @stable ICU 2.4 |
b75a7d8f A |
149 | */ |
150 | virtual UBool matchesIndexValue(uint8_t v) const = 0; | |
151 | ||
152 | /** | |
153 | * Union the set of all characters that may be matched by this object | |
154 | * into the given set. | |
155 | * @param toUnionTo the set into which to union the source characters | |
374ca955 | 156 | * @stable ICU 2.4 |
b75a7d8f A |
157 | */ |
158 | virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; | |
159 | }; | |
160 | ||
161 | U_NAMESPACE_END | |
162 | ||
163 | #endif |