2 **********************************************************************
3 * Copyright (C) 2001-2014 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/stsearch.h"
20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch
)
22 // public constructors and destructors -----------------------------------
24 StringSearch::StringSearch(const UnicodeString
&pattern
,
25 const UnicodeString
&text
,
27 BreakIterator
*breakiter
,
29 SearchIterator(text
, breakiter
),
32 if (U_FAILURE(status
)) {
37 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
38 m_text_
.getBuffer(), m_text_
.length(),
39 locale
.getName(), (UBreakIterator
*)breakiter
,
44 if (U_SUCCESS(status
)) {
45 // m_search_ has been created by the base SearchIterator class
46 m_search_
= m_strsrch_
->search
;
50 StringSearch::StringSearch(const UnicodeString
&pattern
,
51 const UnicodeString
&text
,
52 RuleBasedCollator
*coll
,
53 BreakIterator
*breakiter
,
55 SearchIterator(text
, breakiter
),
58 if (U_FAILURE(status
)) {
63 status
= U_ILLEGAL_ARGUMENT_ERROR
;
67 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
70 m_text_
.length(), coll
->toUCollator(),
71 (UBreakIterator
*)breakiter
,
76 if (U_SUCCESS(status
)) {
77 // m_search_ has been created by the base SearchIterator class
78 m_search_
= m_strsrch_
->search
;
82 StringSearch::StringSearch(const UnicodeString
&pattern
,
83 CharacterIterator
&text
,
85 BreakIterator
*breakiter
,
87 SearchIterator(text
, breakiter
),
90 if (U_FAILURE(status
)) {
94 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
95 m_text_
.getBuffer(), m_text_
.length(),
96 locale
.getName(), (UBreakIterator
*)breakiter
,
101 if (U_SUCCESS(status
)) {
102 // m_search_ has been created by the base SearchIterator class
103 m_search_
= m_strsrch_
->search
;
107 StringSearch::StringSearch(const UnicodeString
&pattern
,
108 CharacterIterator
&text
,
109 RuleBasedCollator
*coll
,
110 BreakIterator
*breakiter
,
111 UErrorCode
&status
) :
112 SearchIterator(text
, breakiter
),
115 if (U_FAILURE(status
)) {
120 status
= U_ILLEGAL_ARGUMENT_ERROR
;
124 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
127 m_text_
.length(), coll
->toUCollator(),
128 (UBreakIterator
*)breakiter
,
130 uprv_free(m_search_
);
133 if (U_SUCCESS(status
)) {
134 // m_search_ has been created by the base SearchIterator class
135 m_search_
= m_strsrch_
->search
;
139 StringSearch::StringSearch(const StringSearch
&that
) :
140 SearchIterator(that
.m_text_
, that
.m_breakiterator_
),
141 m_pattern_(that
.m_pattern_
)
143 UErrorCode status
= U_ZERO_ERROR
;
145 // Free m_search_ from the superclass
146 uprv_free(m_search_
);
149 if (that
.m_strsrch_
== NULL
) {
150 // This was not a good copy
155 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
159 that
.m_strsrch_
->collator
,
160 (UBreakIterator
*)that
.m_breakiterator_
,
162 if (U_SUCCESS(status
)) {
163 // m_search_ has been created by the base SearchIterator class
164 m_search_
= m_strsrch_
->search
;
169 StringSearch::~StringSearch()
171 if (m_strsrch_
!= NULL
) {
172 usearch_close(m_strsrch_
);
178 StringSearch::clone() const {
179 return new StringSearch(*this);
182 // operator overloading ---------------------------------------------
183 StringSearch
& StringSearch::operator=(const StringSearch
&that
)
185 if ((*this) != that
) {
186 UErrorCode status
= U_ZERO_ERROR
;
187 m_text_
= that
.m_text_
;
188 m_breakiterator_
= that
.m_breakiterator_
;
189 m_pattern_
= that
.m_pattern_
;
190 // all m_search_ in the parent class is linked up with m_strsrch_
191 usearch_close(m_strsrch_
);
192 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
196 that
.m_strsrch_
->collator
,
198 // Check null pointer
199 if (m_strsrch_
!= NULL
) {
200 m_search_
= m_strsrch_
->search
;
206 UBool
StringSearch::operator==(const SearchIterator
&that
) const
211 if (SearchIterator::operator ==(that
)) {
212 StringSearch
&thatsrch
= (StringSearch
&)that
;
213 return (this->m_pattern_
== thatsrch
.m_pattern_
&&
214 this->m_strsrch_
->collator
== thatsrch
.m_strsrch_
->collator
);
219 // public get and set methods ----------------------------------------
221 void StringSearch::setOffset(int32_t position
, UErrorCode
&status
)
223 // status checked in usearch_setOffset
224 usearch_setOffset(m_strsrch_
, position
, &status
);
227 int32_t StringSearch::getOffset(void) const
229 return usearch_getOffset(m_strsrch_
);
232 void StringSearch::setText(const UnicodeString
&text
, UErrorCode
&status
)
234 if (U_SUCCESS(status
)) {
236 usearch_setText(m_strsrch_
, text
.getBuffer(), text
.length(), &status
);
240 void StringSearch::setText(CharacterIterator
&text
, UErrorCode
&status
)
242 if (U_SUCCESS(status
)) {
243 text
.getText(m_text_
);
244 usearch_setText(m_strsrch_
, m_text_
.getBuffer(), m_text_
.length(), &status
);
248 RuleBasedCollator
* StringSearch::getCollator() const
250 // Note the const_cast. It would be cleaner if this const method returned a const collator.
251 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator
*>(m_strsrch_
->collator
));
254 void StringSearch::setCollator(RuleBasedCollator
*coll
, UErrorCode
&status
)
256 if (U_SUCCESS(status
)) {
257 usearch_setCollator(m_strsrch_
, coll
->toUCollator(), &status
);
261 void StringSearch::setPattern(const UnicodeString
&pattern
,
264 if (U_SUCCESS(status
)) {
265 m_pattern_
= pattern
;
266 usearch_setPattern(m_strsrch_
, m_pattern_
.getBuffer(), m_pattern_
.length(),
271 const UnicodeString
& StringSearch::getPattern() const
276 // public methods ----------------------------------------------------
278 void StringSearch::reset()
280 usearch_reset(m_strsrch_
);
283 SearchIterator
* StringSearch::safeClone(void) const
285 UErrorCode status
= U_ZERO_ERROR
;
286 StringSearch
*result
= new StringSearch(m_pattern_
, m_text_
,
292 status
= U_MEMORY_ALLOCATION_ERROR
;
295 result
->setOffset(getOffset(), status
);
296 result
->setMatchStart(m_strsrch_
->search
->matchedIndex
);
297 result
->setMatchLength(m_strsrch_
->search
->matchedLength
);
298 if (U_FAILURE(status
)) {
304 // protected method -------------------------------------------------
306 int32_t StringSearch::handleNext(int32_t position
, UErrorCode
&status
)
308 // values passed here are already in the pre-shift position
309 if (U_SUCCESS(status
)) {
310 if (m_strsrch_
->pattern
.cesLength
== 0) {
311 m_search_
->matchedIndex
=
312 m_search_
->matchedIndex
== USEARCH_DONE
?
313 getOffset() : m_search_
->matchedIndex
+ 1;
314 m_search_
->matchedLength
= 0;
315 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
317 if (m_search_
->matchedIndex
== m_search_
->textLength
) {
318 m_search_
->matchedIndex
= USEARCH_DONE
;
322 // looking at usearch.cpp, this part is shifted out to
323 // StringSearch instead of SearchIterator because m_strsrch_ is
324 // not accessible in SearchIterator
326 if (position
+ m_strsrch_
->pattern
.defaultShiftSize
327 > m_search_
->textLength
) {
332 if (m_search_
->matchedLength
<= 0) {
333 // the flipping direction issue has already been handled
335 // for boundary check purposes. this will ensure that the
336 // next match will not preceed the current offset
337 // note search->matchedIndex will always be set to something
339 m_search_
->matchedIndex
= position
- 1;
342 ucol_setOffset(m_strsrch_
->textIter
, position
, &status
);
346 if (m_search_
->isCanonicalMatch
) {
347 // can't use exact here since extra accents are allowed.
348 usearch_handleNextCanonical(m_strsrch_
, &status
);
351 usearch_handleNextExact(m_strsrch_
, &status
);
353 if (U_FAILURE(status
)) {
356 if (m_breakiterator_
== NULL
357 #if !UCONFIG_NO_BREAK_ITERATION
359 m_search_
->matchedIndex
== USEARCH_DONE
||
360 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
361 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
362 m_search_
->matchedLength
))
365 if (m_search_
->matchedIndex
== USEARCH_DONE
) {
366 ucol_setOffset(m_strsrch_
->textIter
,
367 m_search_
->textLength
, &status
);
370 ucol_setOffset(m_strsrch_
->textIter
,
371 m_search_
->matchedIndex
, &status
);
373 return m_search_
->matchedIndex
;
377 // if m_strsrch_->breakIter is always the same as m_breakiterator_
378 // then we don't need to check the match boundaries here because
379 // usearch_handleNextXXX will already have done it.
380 if (m_search_
->isCanonicalMatch
) {
381 // *could* actually use exact here 'cause no extra accents allowed...
382 usearch_handleNextCanonical(m_strsrch_
, &status
);
384 usearch_handleNextExact(m_strsrch_
, &status
);
387 if (U_FAILURE(status
)) {
391 if (m_search_
->matchedIndex
== USEARCH_DONE
) {
392 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->textLength
, &status
);
394 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
, &status
);
397 return m_search_
->matchedIndex
;
404 int32_t StringSearch::handlePrev(int32_t position
, UErrorCode
&status
)
406 // values passed here are already in the pre-shift position
407 if (U_SUCCESS(status
)) {
408 if (m_strsrch_
->pattern
.cesLength
== 0) {
409 m_search_
->matchedIndex
=
410 (m_search_
->matchedIndex
== USEARCH_DONE
? getOffset() :
411 m_search_
->matchedIndex
);
412 if (m_search_
->matchedIndex
== 0) {
416 m_search_
->matchedIndex
--;
417 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
419 m_search_
->matchedLength
= 0;
423 // looking at usearch.cpp, this part is shifted out to
424 // StringSearch instead of SearchIterator because m_strsrch_ is
425 // not accessible in SearchIterator
427 if (!m_search_
->isOverlap
&&
428 position
- m_strsrch_
->pattern
.defaultShiftSize
< 0) {
434 if (m_search_
->isCanonicalMatch
) {
435 // can't use exact here since extra accents are allowed.
436 usearch_handlePreviousCanonical(m_strsrch_
, &status
);
439 usearch_handlePreviousExact(m_strsrch_
, &status
);
441 if (U_FAILURE(status
)) {
444 if (m_breakiterator_
== NULL
445 #if !UCONFIG_NO_BREAK_ITERATION
447 m_search_
->matchedIndex
== USEARCH_DONE
||
448 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
449 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
450 m_search_
->matchedLength
))
453 return m_search_
->matchedIndex
;
457 ucol_setOffset(m_strsrch_
->textIter
, position
, &status
);
459 if (m_search_
->isCanonicalMatch
) {
460 // *could* use exact match here since extra accents *not* allowed!
461 usearch_handlePreviousCanonical(m_strsrch_
, &status
);
463 usearch_handlePreviousExact(m_strsrch_
, &status
);
466 if (U_FAILURE(status
)) {
470 return m_search_
->matchedIndex
;
474 return m_search_
->matchedIndex
;
481 #endif /* #if !UCONFIG_NO_COLLATION */