2 **********************************************************************
3 * Copyright (C) 2001-2003 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_COLLATION
14 #include "unicode/stsearch.h"
20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch
)
22 // public constructors and destructors -----------------------------------
24 StringSearch::StringSearch(const UnicodeString
&pattern
,
25 const UnicodeString
&text
,
27 BreakIterator
*breakiter
,
29 SearchIterator(text
, breakiter
),
33 if (U_FAILURE(status
)) {
38 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
39 m_text_
.getBuffer(), m_text_
.length(),
40 locale
.getName(), (UBreakIterator
*)breakiter
,
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
49 // to the exposed collator (setStrength etc) _should_ modify the
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
55 if (U_SUCCESS(status
)) {
57 const UChar
*rules
= ucol_getRules(m_strsrch_
->collator
, &length
);
58 m_collation_rules_
.setTo(rules
, length
);
59 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
,
61 // m_search_ has been created by the base SearchIterator class
62 m_search_
= m_strsrch_
->search
;
66 StringSearch::StringSearch(const UnicodeString
&pattern
,
67 const UnicodeString
&text
,
68 RuleBasedCollator
*coll
,
69 BreakIterator
*breakiter
,
71 SearchIterator(text
, breakiter
),
75 if (U_FAILURE(status
)) {
80 status
= U_ILLEGAL_ARGUMENT_ERROR
;
84 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
87 m_text_
.length(), coll
->ucollator
,
88 (UBreakIterator
*)breakiter
,
93 if (U_SUCCESS(status
)) {
95 const UChar
*rules
= ucol_getRules(m_strsrch_
->collator
, &length
);
96 m_collation_rules_
.setTo(rules
, length
);
97 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
,
99 // m_search_ has been created by the base SearchIterator class
100 m_search_
= m_strsrch_
->search
;
104 StringSearch::StringSearch(const UnicodeString
&pattern
,
105 CharacterIterator
&text
,
106 const Locale
&locale
,
107 BreakIterator
*breakiter
,
108 UErrorCode
&status
) :
109 SearchIterator(text
, breakiter
),
113 if (U_FAILURE(status
)) {
117 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
118 m_text_
.getBuffer(), m_text_
.length(),
119 locale
.getName(), (UBreakIterator
*)breakiter
,
121 uprv_free(m_search_
);
124 if (U_SUCCESS(status
)) {
126 const UChar
*rules
= ucol_getRules(m_strsrch_
->collator
, &length
);
127 m_collation_rules_
.setTo(rules
, length
);
128 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
,
129 &m_collation_rules_
);
130 // m_search_ has been created by the base SearchIterator class
131 m_search_
= m_strsrch_
->search
;
135 StringSearch::StringSearch(const UnicodeString
&pattern
,
136 CharacterIterator
&text
,
137 RuleBasedCollator
*coll
,
138 BreakIterator
*breakiter
,
139 UErrorCode
&status
) :
140 SearchIterator(text
, breakiter
),
144 if (U_FAILURE(status
)) {
149 status
= U_ILLEGAL_ARGUMENT_ERROR
;
153 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
156 m_text_
.length(), coll
->ucollator
,
157 (UBreakIterator
*)breakiter
,
159 uprv_free(m_search_
);
162 if (U_SUCCESS(status
)) {
164 const UChar
*rules
= ucol_getRules(m_strsrch_
->collator
, &length
);
165 m_collation_rules_
.setTo(rules
, length
);
166 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
,
167 &m_collation_rules_
);
168 // m_search_ has been created by the base SearchIterator class
169 m_search_
= m_strsrch_
->search
;
173 StringSearch::StringSearch(const StringSearch
&that
) :
174 SearchIterator(that
.m_text_
, that
.m_breakiterator_
),
176 m_pattern_(that
.m_pattern_
)
178 UErrorCode status
= U_ZERO_ERROR
;
179 if (that
.m_strsrch_
== NULL
) {
181 status
= U_ILLEGAL_ARGUMENT_ERROR
;
184 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
188 that
.m_strsrch_
->collator
,
189 (UBreakIterator
*)that
.m_breakiterator_
,
192 uprv_free(m_search_
);
195 if (U_SUCCESS(status
)) {
197 const UChar
*rules
= ucol_getRules(m_strsrch_
->collator
, &length
);
198 m_collation_rules_
.setTo(rules
, length
);
199 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
,
200 &m_collation_rules_
);
201 // m_search_ has been created by the base SearchIterator class
202 m_search_
= m_strsrch_
->search
;
203 m_breakiterator_
= that
.m_breakiterator_
;
207 StringSearch::~StringSearch()
209 if (m_strsrch_
!= NULL
) {
210 usearch_close(m_strsrch_
);
216 StringSearch::clone() const {
217 return new StringSearch(*this);
220 // operator overloading ---------------------------------------------
221 StringSearch
& StringSearch::operator=(const StringSearch
&that
)
223 if ((*this) != that
) {
224 UErrorCode status
= U_ZERO_ERROR
;
225 m_text_
= that
.m_text_
;
226 m_breakiterator_
= that
.m_breakiterator_
;
227 m_pattern_
= that
.m_pattern_
;
228 // all m_search_ in the parent class is linked up with m_strsrch_
229 usearch_close(m_strsrch_
);
230 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
234 that
.m_strsrch_
->collator
,
237 const UChar
*rules
= ucol_getRules(m_strsrch_
->collator
, &length
);
238 m_collation_rules_
.setTo(rules
, length
);
239 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
,
240 &m_collation_rules_
);
241 m_search_
= m_strsrch_
->search
;
246 UBool
StringSearch::operator==(const SearchIterator
&that
) const
251 if (SearchIterator::operator ==(that
)) {
252 StringSearch
&thatsrch
= (StringSearch
&)that
;
253 return (this->m_pattern_
== thatsrch
.m_pattern_
&&
254 this->m_strsrch_
->collator
== thatsrch
.m_strsrch_
->collator
);
259 // public get and set methods ----------------------------------------
261 void StringSearch::setOffset(int32_t position
, UErrorCode
&status
)
263 // status checked in usearch_setOffset
264 usearch_setOffset(m_strsrch_
, position
, &status
);
267 int32_t StringSearch::getOffset(void) const
269 return usearch_getOffset(m_strsrch_
);
272 void StringSearch::setText(const UnicodeString
&text
, UErrorCode
&status
)
274 if (U_SUCCESS(status
)) {
276 usearch_setText(m_strsrch_
, text
.getBuffer(), text
.length(), &status
);
280 void StringSearch::setText(CharacterIterator
&text
, UErrorCode
&status
)
282 if (U_SUCCESS(status
)) {
283 text
.getText(m_text_
);
284 usearch_setText(m_strsrch_
, m_text_
.getBuffer(), m_text_
.length(), &status
);
288 RuleBasedCollator
* StringSearch::getCollator() const
290 return (RuleBasedCollator
*)&m_collator_
;
293 void StringSearch::setCollator(RuleBasedCollator
*coll
, UErrorCode
&status
)
295 if (U_SUCCESS(status
)) {
296 usearch_setCollator(m_strsrch_
, coll
->getUCollator(), &status
);
297 m_collation_rules_
.setTo(coll
->getRules());
298 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
,
299 &m_collation_rules_
);
303 void StringSearch::setPattern(const UnicodeString
&pattern
,
306 if (U_SUCCESS(status
)) {
307 m_pattern_
= pattern
;
308 usearch_setPattern(m_strsrch_
, m_pattern_
.getBuffer(), m_pattern_
.length(),
313 const UnicodeString
& StringSearch::getPattern() const
318 // public methods ----------------------------------------------------
320 void StringSearch::reset()
322 usearch_reset(m_strsrch_
);
325 SearchIterator
* StringSearch::safeClone(void) const
327 UErrorCode status
= U_ZERO_ERROR
;
328 StringSearch
*result
= new StringSearch(m_pattern_
, m_text_
,
329 (RuleBasedCollator
*)&m_collator_
,
334 status
= U_MEMORY_ALLOCATION_ERROR
;
337 result
->setOffset(getOffset(), status
);
338 result
->setMatchStart(m_strsrch_
->search
->matchedIndex
);
339 result
->setMatchLength(m_strsrch_
->search
->matchedLength
);
340 if (U_FAILURE(status
)) {
346 // protected method -------------------------------------------------
348 int32_t StringSearch::handleNext(int32_t position
, UErrorCode
&status
)
350 // values passed here are already in the pre-shift position
351 if (U_SUCCESS(status
)) {
352 if (m_strsrch_
->pattern
.CELength
== 0) {
353 m_search_
->matchedIndex
=
354 m_search_
->matchedIndex
== USEARCH_DONE
?
355 getOffset() : m_search_
->matchedIndex
+ 1;
356 m_search_
->matchedLength
= 0;
357 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
359 if (m_search_
->matchedIndex
== m_search_
->textLength
) {
360 m_search_
->matchedIndex
= USEARCH_DONE
;
364 // looking at usearch.cpp, this part is shifted out to
365 // StringSearch instead of SearchIterator because m_strsrch_ is
366 // not accessible in SearchIterator
367 if (position
+ m_strsrch_
->pattern
.defaultShiftSize
368 > m_search_
->textLength
) {
372 if (m_search_
->matchedLength
<= 0) {
373 // the flipping direction issue has already been handled
375 // for boundary check purposes. this will ensure that the
376 // next match will not preceed the current offset
377 // note search->matchedIndex will always be set to something
379 m_search_
->matchedIndex
= position
- 1;
382 ucol_setOffset(m_strsrch_
->textIter
, position
, &status
);
384 if (m_search_
->isCanonicalMatch
) {
385 // can't use exact here since extra accents are allowed.
386 usearch_handleNextCanonical(m_strsrch_
, &status
);
389 usearch_handleNextExact(m_strsrch_
, &status
);
391 if (U_FAILURE(status
)) {
394 if (m_breakiterator_
== NULL
395 #if !UCONFIG_NO_BREAK_ITERATION
397 m_search_
->matchedIndex
== USEARCH_DONE
||
398 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
399 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
400 m_search_
->matchedLength
))
403 if (m_search_
->matchedIndex
== USEARCH_DONE
) {
404 ucol_setOffset(m_strsrch_
->textIter
,
405 m_search_
->textLength
, &status
);
408 ucol_setOffset(m_strsrch_
->textIter
,
409 m_search_
->matchedIndex
, &status
);
411 return m_search_
->matchedIndex
;
419 int32_t StringSearch::handlePrev(int32_t position
, UErrorCode
&status
)
421 // values passed here are already in the pre-shift position
422 if (U_SUCCESS(status
)) {
423 if (m_strsrch_
->pattern
.CELength
== 0) {
424 m_search_
->matchedIndex
=
425 (m_search_
->matchedIndex
== USEARCH_DONE
? getOffset() :
426 m_search_
->matchedIndex
);
427 if (m_search_
->matchedIndex
== 0) {
431 m_search_
->matchedIndex
--;
432 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
434 m_search_
->matchedLength
= 0;
438 // looking at usearch.cpp, this part is shifted out to
439 // StringSearch instead of SearchIterator because m_strsrch_ is
440 // not accessible in SearchIterator
441 if (!m_search_
->isOverlap
&&
442 position
- m_strsrch_
->pattern
.defaultShiftSize
< 0) {
447 if (m_search_
->isCanonicalMatch
) {
448 // can't use exact here since extra accents are allowed.
449 usearch_handlePreviousCanonical(m_strsrch_
, &status
);
452 usearch_handlePreviousExact(m_strsrch_
, &status
);
454 if (U_FAILURE(status
)) {
457 if (m_breakiterator_
== NULL
458 #if !UCONFIG_NO_BREAK_ITERATION
460 m_search_
->matchedIndex
== USEARCH_DONE
||
461 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
462 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
463 m_search_
->matchedLength
))
466 return m_search_
->matchedIndex
;
471 return m_search_
->matchedIndex
;
478 #endif /* #if !UCONFIG_NO_COLLATION */