]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/strmatch.cpp
2 **********************************************************************
3 * Copyright (c) 2001-2012, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 07/23/01 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
18 #include "unicode/uniset.h"
19 #include "unicode/utf16.h"
23 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher
)
25 StringMatcher::StringMatcher(const UnicodeString
& theString
,
29 const TransliterationRuleData
& theData
) :
31 segmentNumber(segmentNum
),
35 theString
.extractBetween(start
, limit
, pattern
);
38 StringMatcher::StringMatcher(const StringMatcher
& o
) :
44 segmentNumber(o
.segmentNumber
),
45 matchStart(o
.matchStart
),
46 matchLimit(o
.matchLimit
)
53 StringMatcher::~StringMatcher() {
57 * Implement UnicodeFunctor
59 UnicodeFunctor
* StringMatcher::clone() const {
60 return new StringMatcher(*this);
64 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
65 * and return the pointer.
67 UnicodeMatcher
* StringMatcher::toMatcher() const {
68 StringMatcher
*nonconst_this
= const_cast<StringMatcher
*>(this);
69 UnicodeMatcher
*nonconst_base
= static_cast<UnicodeMatcher
*>(nonconst_this
);
75 * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer
76 * and return the pointer.
78 UnicodeReplacer
* StringMatcher::toReplacer() const {
79 StringMatcher
*nonconst_this
= const_cast<StringMatcher
*>(this);
80 UnicodeReplacer
*nonconst_base
= static_cast<UnicodeReplacer
*>(nonconst_this
);
86 * Implement UnicodeMatcher
88 UMatchDegree
StringMatcher::matches(const Replaceable
& text
,
93 int32_t cursor
= offset
;
95 // Match in the reverse direction
96 for (i
=pattern
.length()-1; i
>=0; --i
) {
97 UChar keyChar
= pattern
.charAt(i
);
98 UnicodeMatcher
* subm
= data
->lookupMatcher(keyChar
);
100 if (cursor
> limit
&&
101 keyChar
== text
.charAt(cursor
)) {
108 subm
->matches(text
, cursor
, limit
, incremental
);
114 // Record the match position, but adjust for a normal
115 // forward start, limit, and only if a prior match does not
116 // exist -- we want the rightmost match.
117 if (matchStart
< 0) {
118 matchStart
= cursor
+1;
119 matchLimit
= offset
+1;
122 for (i
=0; i
<pattern
.length(); ++i
) {
123 if (incremental
&& cursor
== limit
) {
124 // We've reached the context limit without a mismatch and
125 // without completing our match.
126 return U_PARTIAL_MATCH
;
128 UChar keyChar
= pattern
.charAt(i
);
129 UnicodeMatcher
* subm
= data
->lookupMatcher(keyChar
);
131 // Don't need the cursor < limit check if
132 // incremental is TRUE (because it's done above); do need
134 if (cursor
< limit
&&
135 keyChar
== text
.charAt(cursor
)) {
142 subm
->matches(text
, cursor
, limit
, incremental
);
148 // Record the match position
158 * Implement UnicodeMatcher
160 UnicodeString
& StringMatcher::toPattern(UnicodeString
& result
,
161 UBool escapeUnprintable
) const
164 UnicodeString str
, quoteBuf
;
165 if (segmentNumber
> 0) {
166 result
.append((UChar
)40); /*(*/
168 for (int32_t i
=0; i
<pattern
.length(); ++i
) {
169 UChar keyChar
= pattern
.charAt(i
);
170 const UnicodeMatcher
* m
= data
->lookupMatcher(keyChar
);
172 ICU_Utility::appendToRule(result
, keyChar
, FALSE
, escapeUnprintable
, quoteBuf
);
174 ICU_Utility::appendToRule(result
, m
->toPattern(str
, escapeUnprintable
),
175 TRUE
, escapeUnprintable
, quoteBuf
);
178 if (segmentNumber
> 0) {
179 result
.append((UChar
)41); /*)*/
181 // Flush quoteBuf out to result
182 ICU_Utility::appendToRule(result
, -1,
183 TRUE
, escapeUnprintable
, quoteBuf
);
188 * Implement UnicodeMatcher
190 UBool
StringMatcher::matchesIndexValue(uint8_t v
) const {
191 if (pattern
.length() == 0) {
194 UChar32 c
= pattern
.char32At(0);
195 const UnicodeMatcher
*m
= data
->lookupMatcher(c
);
196 return (m
== 0) ? ((c
& 0xFF) == v
) : m
->matchesIndexValue(v
);
200 * Implement UnicodeMatcher
202 void StringMatcher::addMatchSetTo(UnicodeSet
& toUnionTo
) const {
204 for (int32_t i
=0; i
<pattern
.length(); i
+=U16_LENGTH(ch
)) {
205 ch
= pattern
.char32At(i
);
206 const UnicodeMatcher
* matcher
= data
->lookupMatcher(ch
);
207 if (matcher
== NULL
) {
210 matcher
->addMatchSetTo(toUnionTo
);
216 * UnicodeReplacer API
218 int32_t StringMatcher::replace(Replaceable
& text
,
221 int32_t& /*cursor*/) {
225 // Copy segment with out-of-band data
226 int32_t dest
= limit
;
227 // If there was no match, that means that a quantifier
228 // matched zero-length. E.g., x (a)* y matched "xy".
229 if (matchStart
>= 0) {
230 if (matchStart
!= matchLimit
) {
231 text
.copy(matchStart
, matchLimit
, dest
);
232 outLen
= matchLimit
- matchStart
;
236 text
.handleReplaceBetween(start
, limit
, UnicodeString()); // delete original text
242 * UnicodeReplacer API
244 UnicodeString
& StringMatcher::toReplacerPattern(UnicodeString
& rule
,
245 UBool
/*escapeUnprintable*/) const {
246 // assert(segmentNumber > 0);
248 rule
.append((UChar
)0x0024 /*$*/);
249 ICU_Utility::appendNumber(rule
, segmentNumber
, 10, 1);
254 * Remove any match info. This must be called before performing a
255 * set of matches with this segment.
257 void StringMatcher::resetMatch() {
258 matchStart
= matchLimit
= -1;
262 * Union the set of all characters that may output by this object
263 * into the given set.
264 * @param toUnionTo the set into which to union the output characters
266 void StringMatcher::addReplacementSetTo(UnicodeSet
& /*toUnionTo*/) const {
267 // The output of this replacer varies; it is the source text between
268 // matchStart and matchLimit. Since this varies depending on the
269 // input text, we can't compute it here. We can either do nothing
270 // or we can add ALL characters to the set. It's probably more useful
275 * Implement UnicodeFunctor
277 void StringMatcher::setData(const TransliterationRuleData
* d
) {
280 while (i
<pattern
.length()) {
281 UChar32 c
= pattern
.char32At(i
);
282 UnicodeFunctor
* f
= data
->lookup(c
);
292 #endif /* #if !UCONFIG_NO_TRANSLITERATION */