]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/strmatch.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2001-2012, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 07/23/01 aliu Creation.
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
20 #include "unicode/uniset.h"
21 #include "unicode/utf16.h"
25 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher
)
27 StringMatcher::StringMatcher(const UnicodeString
& theString
,
31 const TransliterationRuleData
& theData
) :
33 segmentNumber(segmentNum
),
37 theString
.extractBetween(start
, limit
, pattern
);
40 StringMatcher::StringMatcher(const StringMatcher
& o
) :
46 segmentNumber(o
.segmentNumber
),
47 matchStart(o
.matchStart
),
48 matchLimit(o
.matchLimit
)
55 StringMatcher::~StringMatcher() {
59 * Implement UnicodeFunctor
61 UnicodeFunctor
* StringMatcher::clone() const {
62 return new StringMatcher(*this);
66 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
67 * and return the pointer.
69 UnicodeMatcher
* StringMatcher::toMatcher() const {
70 StringMatcher
*nonconst_this
= const_cast<StringMatcher
*>(this);
71 UnicodeMatcher
*nonconst_base
= static_cast<UnicodeMatcher
*>(nonconst_this
);
77 * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer
78 * and return the pointer.
80 UnicodeReplacer
* StringMatcher::toReplacer() const {
81 StringMatcher
*nonconst_this
= const_cast<StringMatcher
*>(this);
82 UnicodeReplacer
*nonconst_base
= static_cast<UnicodeReplacer
*>(nonconst_this
);
88 * Implement UnicodeMatcher
90 UMatchDegree
StringMatcher::matches(const Replaceable
& text
,
95 int32_t cursor
= offset
;
97 // Match in the reverse direction
98 for (i
=pattern
.length()-1; i
>=0; --i
) {
99 UChar keyChar
= pattern
.charAt(i
);
100 UnicodeMatcher
* subm
= data
->lookupMatcher(keyChar
);
102 if (cursor
> limit
&&
103 keyChar
== text
.charAt(cursor
)) {
110 subm
->matches(text
, cursor
, limit
, incremental
);
116 // Record the match position, but adjust for a normal
117 // forward start, limit, and only if a prior match does not
118 // exist -- we want the rightmost match.
119 if (matchStart
< 0) {
120 matchStart
= cursor
+1;
121 matchLimit
= offset
+1;
124 for (i
=0; i
<pattern
.length(); ++i
) {
125 if (incremental
&& cursor
== limit
) {
126 // We've reached the context limit without a mismatch and
127 // without completing our match.
128 return U_PARTIAL_MATCH
;
130 UChar keyChar
= pattern
.charAt(i
);
131 UnicodeMatcher
* subm
= data
->lookupMatcher(keyChar
);
133 // Don't need the cursor < limit check if
134 // incremental is TRUE (because it's done above); do need
136 if (cursor
< limit
&&
137 keyChar
== text
.charAt(cursor
)) {
144 subm
->matches(text
, cursor
, limit
, incremental
);
150 // Record the match position
160 * Implement UnicodeMatcher
162 UnicodeString
& StringMatcher::toPattern(UnicodeString
& result
,
163 UBool escapeUnprintable
) const
166 UnicodeString str
, quoteBuf
;
167 if (segmentNumber
> 0) {
168 result
.append((UChar
)40); /*(*/
170 for (int32_t i
=0; i
<pattern
.length(); ++i
) {
171 UChar keyChar
= pattern
.charAt(i
);
172 const UnicodeMatcher
* m
= data
->lookupMatcher(keyChar
);
174 ICU_Utility::appendToRule(result
, keyChar
, FALSE
, escapeUnprintable
, quoteBuf
);
176 ICU_Utility::appendToRule(result
, m
->toPattern(str
, escapeUnprintable
),
177 TRUE
, escapeUnprintable
, quoteBuf
);
180 if (segmentNumber
> 0) {
181 result
.append((UChar
)41); /*)*/
183 // Flush quoteBuf out to result
184 ICU_Utility::appendToRule(result
, -1,
185 TRUE
, escapeUnprintable
, quoteBuf
);
190 * Implement UnicodeMatcher
192 UBool
StringMatcher::matchesIndexValue(uint8_t v
) const {
193 if (pattern
.length() == 0) {
196 UChar32 c
= pattern
.char32At(0);
197 const UnicodeMatcher
*m
= data
->lookupMatcher(c
);
198 return (m
== 0) ? ((c
& 0xFF) == v
) : m
->matchesIndexValue(v
);
202 * Implement UnicodeMatcher
204 void StringMatcher::addMatchSetTo(UnicodeSet
& toUnionTo
) const {
206 for (int32_t i
=0; i
<pattern
.length(); i
+=U16_LENGTH(ch
)) {
207 ch
= pattern
.char32At(i
);
208 const UnicodeMatcher
* matcher
= data
->lookupMatcher(ch
);
209 if (matcher
== NULL
) {
212 matcher
->addMatchSetTo(toUnionTo
);
218 * UnicodeReplacer API
220 int32_t StringMatcher::replace(Replaceable
& text
,
223 int32_t& /*cursor*/) {
227 // Copy segment with out-of-band data
228 int32_t dest
= limit
;
229 // If there was no match, that means that a quantifier
230 // matched zero-length. E.g., x (a)* y matched "xy".
231 if (matchStart
>= 0) {
232 if (matchStart
!= matchLimit
) {
233 text
.copy(matchStart
, matchLimit
, dest
);
234 outLen
= matchLimit
- matchStart
;
238 text
.handleReplaceBetween(start
, limit
, UnicodeString()); // delete original text
244 * UnicodeReplacer API
246 UnicodeString
& StringMatcher::toReplacerPattern(UnicodeString
& rule
,
247 UBool
/*escapeUnprintable*/) const {
248 // assert(segmentNumber > 0);
250 rule
.append((UChar
)0x0024 /*$*/);
251 ICU_Utility::appendNumber(rule
, segmentNumber
, 10, 1);
256 * Remove any match info. This must be called before performing a
257 * set of matches with this segment.
259 void StringMatcher::resetMatch() {
260 matchStart
= matchLimit
= -1;
264 * Union the set of all characters that may output by this object
265 * into the given set.
266 * @param toUnionTo the set into which to union the output characters
268 void StringMatcher::addReplacementSetTo(UnicodeSet
& /*toUnionTo*/) const {
269 // The output of this replacer varies; it is the source text between
270 // matchStart and matchLimit. Since this varies depending on the
271 // input text, we can't compute it here. We can either do nothing
272 // or we can add ALL characters to the set. It's probably more useful
277 * Implement UnicodeFunctor
279 void StringMatcher::setData(const TransliterationRuleData
* d
) {
282 while (i
<pattern
.length()) {
283 UChar32 c
= pattern
.char32At(i
);
284 UnicodeFunctor
* f
= data
->lookup(c
);
294 #endif /* #if !UCONFIG_NO_TRANSLITERATION */