2 **********************************************************************
3 * Copyright (C) 2001-2008 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/stsearch.h"
20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch
)
22 // public constructors and destructors -----------------------------------
24 StringSearch::StringSearch(const UnicodeString
&pattern
,
25 const UnicodeString
&text
,
27 BreakIterator
*breakiter
,
29 SearchIterator(text
, breakiter
),
33 if (U_FAILURE(status
)) {
38 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
39 m_text_
.getBuffer(), m_text_
.length(),
40 locale
.getName(), (UBreakIterator
*)breakiter
,
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
49 // to the exposed collator (setStrength etc) _should_ modify the
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
55 if (U_SUCCESS(status
)) {
57 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
);
58 // m_search_ has been created by the base SearchIterator class
59 m_search_
= m_strsrch_
->search
;
63 StringSearch::StringSearch(const UnicodeString
&pattern
,
64 const UnicodeString
&text
,
65 RuleBasedCollator
*coll
,
66 BreakIterator
*breakiter
,
68 SearchIterator(text
, breakiter
),
72 if (U_FAILURE(status
)) {
77 status
= U_ILLEGAL_ARGUMENT_ERROR
;
81 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
84 m_text_
.length(), coll
->ucollator
,
85 (UBreakIterator
*)breakiter
,
90 if (U_SUCCESS(status
)) {
92 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
);
93 // m_search_ has been created by the base SearchIterator class
94 m_search_
= m_strsrch_
->search
;
98 StringSearch::StringSearch(const UnicodeString
&pattern
,
99 CharacterIterator
&text
,
100 const Locale
&locale
,
101 BreakIterator
*breakiter
,
102 UErrorCode
&status
) :
103 SearchIterator(text
, breakiter
),
107 if (U_FAILURE(status
)) {
111 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
112 m_text_
.getBuffer(), m_text_
.length(),
113 locale
.getName(), (UBreakIterator
*)breakiter
,
115 uprv_free(m_search_
);
118 if (U_SUCCESS(status
)) {
119 // Alias the collator
120 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
);
121 // m_search_ has been created by the base SearchIterator class
122 m_search_
= m_strsrch_
->search
;
126 StringSearch::StringSearch(const UnicodeString
&pattern
,
127 CharacterIterator
&text
,
128 RuleBasedCollator
*coll
,
129 BreakIterator
*breakiter
,
130 UErrorCode
&status
) :
131 SearchIterator(text
, breakiter
),
135 if (U_FAILURE(status
)) {
140 status
= U_ILLEGAL_ARGUMENT_ERROR
;
144 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
147 m_text_
.length(), coll
->ucollator
,
148 (UBreakIterator
*)breakiter
,
150 uprv_free(m_search_
);
153 if (U_SUCCESS(status
)) {
154 // Alias the collator
155 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
);
156 // m_search_ has been created by the base SearchIterator class
157 m_search_
= m_strsrch_
->search
;
161 StringSearch::StringSearch(const StringSearch
&that
) :
162 SearchIterator(that
.m_text_
, that
.m_breakiterator_
),
164 m_pattern_(that
.m_pattern_
)
166 UErrorCode status
= U_ZERO_ERROR
;
168 // Free m_search_ from the superclass
169 uprv_free(m_search_
);
172 if (that
.m_strsrch_
== NULL
) {
173 // This was not a good copy
178 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
182 that
.m_strsrch_
->collator
,
183 (UBreakIterator
*)that
.m_breakiterator_
,
185 if (U_SUCCESS(status
)) {
186 // Alias the collator
187 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
);
188 // m_search_ has been created by the base SearchIterator class
189 m_search_
= m_strsrch_
->search
;
194 StringSearch::~StringSearch()
196 if (m_strsrch_
!= NULL
) {
197 usearch_close(m_strsrch_
);
203 StringSearch::clone() const {
204 return new StringSearch(*this);
207 // operator overloading ---------------------------------------------
208 StringSearch
& StringSearch::operator=(const StringSearch
&that
)
210 if ((*this) != that
) {
211 UErrorCode status
= U_ZERO_ERROR
;
212 m_text_
= that
.m_text_
;
213 m_breakiterator_
= that
.m_breakiterator_
;
214 m_pattern_
= that
.m_pattern_
;
215 // all m_search_ in the parent class is linked up with m_strsrch_
216 usearch_close(m_strsrch_
);
217 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
221 that
.m_strsrch_
->collator
,
223 // Check null pointer
224 if (m_strsrch_
!= NULL
) {
225 // Alias the collator
226 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
);
227 m_search_
= m_strsrch_
->search
;
233 UBool
StringSearch::operator==(const SearchIterator
&that
) const
238 if (SearchIterator::operator ==(that
)) {
239 StringSearch
&thatsrch
= (StringSearch
&)that
;
240 return (this->m_pattern_
== thatsrch
.m_pattern_
&&
241 this->m_strsrch_
->collator
== thatsrch
.m_strsrch_
->collator
);
246 // public get and set methods ----------------------------------------
248 void StringSearch::setOffset(int32_t position
, UErrorCode
&status
)
250 // status checked in usearch_setOffset
251 usearch_setOffset(m_strsrch_
, position
, &status
);
254 int32_t StringSearch::getOffset(void) const
256 return usearch_getOffset(m_strsrch_
);
259 void StringSearch::setText(const UnicodeString
&text
, UErrorCode
&status
)
261 if (U_SUCCESS(status
)) {
263 usearch_setText(m_strsrch_
, text
.getBuffer(), text
.length(), &status
);
267 void StringSearch::setText(CharacterIterator
&text
, UErrorCode
&status
)
269 if (U_SUCCESS(status
)) {
270 text
.getText(m_text_
);
271 usearch_setText(m_strsrch_
, m_text_
.getBuffer(), m_text_
.length(), &status
);
275 RuleBasedCollator
* StringSearch::getCollator() const
277 return (RuleBasedCollator
*)&m_collator_
;
280 void StringSearch::setCollator(RuleBasedCollator
*coll
, UErrorCode
&status
)
282 if (U_SUCCESS(status
)) {
283 usearch_setCollator(m_strsrch_
, coll
->getUCollator(), &status
);
284 // Alias the collator
285 m_collator_
.setUCollator((UCollator
*)m_strsrch_
->collator
);
289 void StringSearch::setPattern(const UnicodeString
&pattern
,
292 if (U_SUCCESS(status
)) {
293 m_pattern_
= pattern
;
294 usearch_setPattern(m_strsrch_
, m_pattern_
.getBuffer(), m_pattern_
.length(),
299 const UnicodeString
& StringSearch::getPattern() const
304 // public methods ----------------------------------------------------
306 void StringSearch::reset()
308 usearch_reset(m_strsrch_
);
311 SearchIterator
* StringSearch::safeClone(void) const
313 UErrorCode status
= U_ZERO_ERROR
;
314 StringSearch
*result
= new StringSearch(m_pattern_
, m_text_
,
315 (RuleBasedCollator
*)&m_collator_
,
320 status
= U_MEMORY_ALLOCATION_ERROR
;
323 result
->setOffset(getOffset(), status
);
324 result
->setMatchStart(m_strsrch_
->search
->matchedIndex
);
325 result
->setMatchLength(m_strsrch_
->search
->matchedLength
);
326 if (U_FAILURE(status
)) {
332 // protected method -------------------------------------------------
334 int32_t StringSearch::handleNext(int32_t position
, UErrorCode
&status
)
336 // values passed here are already in the pre-shift position
337 if (U_SUCCESS(status
)) {
338 if (m_strsrch_
->pattern
.CELength
== 0) {
339 m_search_
->matchedIndex
=
340 m_search_
->matchedIndex
== USEARCH_DONE
?
341 getOffset() : m_search_
->matchedIndex
+ 1;
342 m_search_
->matchedLength
= 0;
343 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
345 if (m_search_
->matchedIndex
== m_search_
->textLength
) {
346 m_search_
->matchedIndex
= USEARCH_DONE
;
350 // looking at usearch.cpp, this part is shifted out to
351 // StringSearch instead of SearchIterator because m_strsrch_ is
352 // not accessible in SearchIterator
354 if (position
+ m_strsrch_
->pattern
.defaultShiftSize
355 > m_search_
->textLength
) {
360 if (m_search_
->matchedLength
<= 0) {
361 // the flipping direction issue has already been handled
363 // for boundary check purposes. this will ensure that the
364 // next match will not preceed the current offset
365 // note search->matchedIndex will always be set to something
367 m_search_
->matchedIndex
= position
- 1;
370 ucol_setOffset(m_strsrch_
->textIter
, position
, &status
);
374 if (m_search_
->isCanonicalMatch
) {
375 // can't use exact here since extra accents are allowed.
376 usearch_handleNextCanonical(m_strsrch_
, &status
);
379 usearch_handleNextExact(m_strsrch_
, &status
);
381 if (U_FAILURE(status
)) {
384 if (m_breakiterator_
== NULL
385 #if !UCONFIG_NO_BREAK_ITERATION
387 m_search_
->matchedIndex
== USEARCH_DONE
||
388 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
389 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
390 m_search_
->matchedLength
))
393 if (m_search_
->matchedIndex
== USEARCH_DONE
) {
394 ucol_setOffset(m_strsrch_
->textIter
,
395 m_search_
->textLength
, &status
);
398 ucol_setOffset(m_strsrch_
->textIter
,
399 m_search_
->matchedIndex
, &status
);
401 return m_search_
->matchedIndex
;
405 // if m_strsrch_->breakIter is always the same as m_breakiterator_
406 // then we don't need to check the match boundaries here because
407 // usearch_handleNextXXX will already have done it.
408 if (m_search_
->isCanonicalMatch
) {
409 // *could* actually use exact here 'cause no extra accents allowed...
410 usearch_handleNextCanonical(m_strsrch_
, &status
);
412 usearch_handleNextExact(m_strsrch_
, &status
);
415 if (U_FAILURE(status
)) {
419 if (m_search_
->matchedIndex
== USEARCH_DONE
) {
420 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->textLength
, &status
);
422 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
, &status
);
425 return m_search_
->matchedIndex
;
432 int32_t StringSearch::handlePrev(int32_t position
, UErrorCode
&status
)
434 // values passed here are already in the pre-shift position
435 if (U_SUCCESS(status
)) {
436 if (m_strsrch_
->pattern
.CELength
== 0) {
437 m_search_
->matchedIndex
=
438 (m_search_
->matchedIndex
== USEARCH_DONE
? getOffset() :
439 m_search_
->matchedIndex
);
440 if (m_search_
->matchedIndex
== 0) {
444 m_search_
->matchedIndex
--;
445 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
447 m_search_
->matchedLength
= 0;
451 // looking at usearch.cpp, this part is shifted out to
452 // StringSearch instead of SearchIterator because m_strsrch_ is
453 // not accessible in SearchIterator
455 if (!m_search_
->isOverlap
&&
456 position
- m_strsrch_
->pattern
.defaultShiftSize
< 0) {
462 if (m_search_
->isCanonicalMatch
) {
463 // can't use exact here since extra accents are allowed.
464 usearch_handlePreviousCanonical(m_strsrch_
, &status
);
467 usearch_handlePreviousExact(m_strsrch_
, &status
);
469 if (U_FAILURE(status
)) {
472 if (m_breakiterator_
== NULL
473 #if !UCONFIG_NO_BREAK_ITERATION
475 m_search_
->matchedIndex
== USEARCH_DONE
||
476 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
477 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
478 m_search_
->matchedLength
))
481 return m_search_
->matchedIndex
;
485 ucol_setOffset(m_strsrch_
->textIter
, position
, &status
);
487 if (m_search_
->isCanonicalMatch
) {
488 // *could* use exact match here since extra accents *not* allowed!
489 usearch_handlePreviousCanonical(m_strsrch_
, &status
);
491 usearch_handlePreviousExact(m_strsrch_
, &status
);
494 if (U_FAILURE(status
)) {
498 return m_search_
->matchedIndex
;
502 return m_search_
->matchedIndex
;
509 #endif /* #if !UCONFIG_NO_COLLATION */