1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2001-2014 IBM and others. All rights reserved.
6 **********************************************************************
7 * Date Name Description
8 * 03/22/2000 helena Creation.
9 **********************************************************************
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/stsearch.h"
22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch
)
24 // public constructors and destructors -----------------------------------
26 StringSearch::StringSearch(const UnicodeString
&pattern
,
27 const UnicodeString
&text
,
29 BreakIterator
*breakiter
,
31 SearchIterator(text
, breakiter
),
34 if (U_FAILURE(status
)) {
39 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
40 m_text_
.getBuffer(), m_text_
.length(),
41 locale
.getName(), (UBreakIterator
*)breakiter
,
46 if (U_SUCCESS(status
)) {
47 // m_search_ has been created by the base SearchIterator class
48 m_search_
= m_strsrch_
->search
;
52 StringSearch::StringSearch(const UnicodeString
&pattern
,
53 const UnicodeString
&text
,
54 RuleBasedCollator
*coll
,
55 BreakIterator
*breakiter
,
57 SearchIterator(text
, breakiter
),
60 if (U_FAILURE(status
)) {
65 status
= U_ILLEGAL_ARGUMENT_ERROR
;
69 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
72 m_text_
.length(), coll
->toUCollator(),
73 (UBreakIterator
*)breakiter
,
78 if (U_SUCCESS(status
)) {
79 // m_search_ has been created by the base SearchIterator class
80 m_search_
= m_strsrch_
->search
;
84 StringSearch::StringSearch(const UnicodeString
&pattern
,
85 CharacterIterator
&text
,
87 BreakIterator
*breakiter
,
89 SearchIterator(text
, breakiter
),
92 if (U_FAILURE(status
)) {
96 m_strsrch_
= usearch_open(m_pattern_
.getBuffer(), m_pattern_
.length(),
97 m_text_
.getBuffer(), m_text_
.length(),
98 locale
.getName(), (UBreakIterator
*)breakiter
,
100 uprv_free(m_search_
);
103 if (U_SUCCESS(status
)) {
104 // m_search_ has been created by the base SearchIterator class
105 m_search_
= m_strsrch_
->search
;
109 StringSearch::StringSearch(const UnicodeString
&pattern
,
110 CharacterIterator
&text
,
111 RuleBasedCollator
*coll
,
112 BreakIterator
*breakiter
,
113 UErrorCode
&status
) :
114 SearchIterator(text
, breakiter
),
117 if (U_FAILURE(status
)) {
122 status
= U_ILLEGAL_ARGUMENT_ERROR
;
126 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
129 m_text_
.length(), coll
->toUCollator(),
130 (UBreakIterator
*)breakiter
,
132 uprv_free(m_search_
);
135 if (U_SUCCESS(status
)) {
136 // m_search_ has been created by the base SearchIterator class
137 m_search_
= m_strsrch_
->search
;
141 StringSearch::StringSearch(const StringSearch
&that
) :
142 SearchIterator(that
.m_text_
, that
.m_breakiterator_
),
143 m_pattern_(that
.m_pattern_
)
145 UErrorCode status
= U_ZERO_ERROR
;
147 // Free m_search_ from the superclass
148 uprv_free(m_search_
);
151 if (that
.m_strsrch_
== NULL
) {
152 // This was not a good copy
157 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
161 that
.m_strsrch_
->collator
,
162 (UBreakIterator
*)that
.m_breakiterator_
,
164 if (U_SUCCESS(status
)) {
165 // m_search_ has been created by the base SearchIterator class
166 m_search_
= m_strsrch_
->search
;
171 StringSearch::~StringSearch()
173 if (m_strsrch_
!= NULL
) {
174 usearch_close(m_strsrch_
);
180 StringSearch::clone() const {
181 return new StringSearch(*this);
184 // operator overloading ---------------------------------------------
185 StringSearch
& StringSearch::operator=(const StringSearch
&that
)
187 if ((*this) != that
) {
188 UErrorCode status
= U_ZERO_ERROR
;
189 m_text_
= that
.m_text_
;
190 m_breakiterator_
= that
.m_breakiterator_
;
191 m_pattern_
= that
.m_pattern_
;
192 // all m_search_ in the parent class is linked up with m_strsrch_
193 usearch_close(m_strsrch_
);
194 m_strsrch_
= usearch_openFromCollator(m_pattern_
.getBuffer(),
198 that
.m_strsrch_
->collator
,
200 // Check null pointer
201 if (m_strsrch_
!= NULL
) {
202 m_search_
= m_strsrch_
->search
;
208 UBool
StringSearch::operator==(const SearchIterator
&that
) const
213 if (SearchIterator::operator ==(that
)) {
214 StringSearch
&thatsrch
= (StringSearch
&)that
;
215 return (this->m_pattern_
== thatsrch
.m_pattern_
&&
216 this->m_strsrch_
->collator
== thatsrch
.m_strsrch_
->collator
);
221 // public get and set methods ----------------------------------------
223 void StringSearch::setOffset(int32_t position
, UErrorCode
&status
)
225 // status checked in usearch_setOffset
226 usearch_setOffset(m_strsrch_
, position
, &status
);
229 int32_t StringSearch::getOffset(void) const
231 return usearch_getOffset(m_strsrch_
);
234 void StringSearch::setText(const UnicodeString
&text
, UErrorCode
&status
)
236 if (U_SUCCESS(status
)) {
238 usearch_setText(m_strsrch_
, text
.getBuffer(), text
.length(), &status
);
242 void StringSearch::setText(CharacterIterator
&text
, UErrorCode
&status
)
244 if (U_SUCCESS(status
)) {
245 text
.getText(m_text_
);
246 usearch_setText(m_strsrch_
, m_text_
.getBuffer(), m_text_
.length(), &status
);
250 RuleBasedCollator
* StringSearch::getCollator() const
252 // Note the const_cast. It would be cleaner if this const method returned a const collator.
253 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator
*>(m_strsrch_
->collator
));
256 void StringSearch::setCollator(RuleBasedCollator
*coll
, UErrorCode
&status
)
258 if (U_SUCCESS(status
)) {
259 usearch_setCollator(m_strsrch_
, coll
->toUCollator(), &status
);
263 void StringSearch::setPattern(const UnicodeString
&pattern
,
266 if (U_SUCCESS(status
)) {
267 m_pattern_
= pattern
;
268 usearch_setPattern(m_strsrch_
, m_pattern_
.getBuffer(), m_pattern_
.length(),
273 const UnicodeString
& StringSearch::getPattern() const
278 // public methods ----------------------------------------------------
280 void StringSearch::reset()
282 usearch_reset(m_strsrch_
);
285 SearchIterator
* StringSearch::safeClone(void) const
287 UErrorCode status
= U_ZERO_ERROR
;
288 StringSearch
*result
= new StringSearch(m_pattern_
, m_text_
,
294 status
= U_MEMORY_ALLOCATION_ERROR
;
297 result
->setOffset(getOffset(), status
);
298 result
->setMatchStart(m_strsrch_
->search
->matchedIndex
);
299 result
->setMatchLength(m_strsrch_
->search
->matchedLength
);
300 if (U_FAILURE(status
)) {
306 // protected method -------------------------------------------------
308 int32_t StringSearch::handleNext(int32_t position
, UErrorCode
&status
)
310 // values passed here are already in the pre-shift position
311 if (U_SUCCESS(status
)) {
312 if (m_strsrch_
->pattern
.cesLength
== 0) {
313 m_search_
->matchedIndex
=
314 m_search_
->matchedIndex
== USEARCH_DONE
?
315 getOffset() : m_search_
->matchedIndex
+ 1;
316 m_search_
->matchedLength
= 0;
317 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
319 if (m_search_
->matchedIndex
== m_search_
->textLength
) {
320 m_search_
->matchedIndex
= USEARCH_DONE
;
324 // looking at usearch.cpp, this part is shifted out to
325 // StringSearch instead of SearchIterator because m_strsrch_ is
326 // not accessible in SearchIterator
328 if (position
+ m_strsrch_
->pattern
.defaultShiftSize
329 > m_search_
->textLength
) {
334 if (m_search_
->matchedLength
<= 0) {
335 // the flipping direction issue has already been handled
337 // for boundary check purposes. this will ensure that the
338 // next match will not preceed the current offset
339 // note search->matchedIndex will always be set to something
341 m_search_
->matchedIndex
= position
- 1;
344 ucol_setOffset(m_strsrch_
->textIter
, position
, &status
);
348 if (m_search_
->isCanonicalMatch
) {
349 // can't use exact here since extra accents are allowed.
350 usearch_handleNextCanonical(m_strsrch_
, &status
);
353 usearch_handleNextExact(m_strsrch_
, &status
);
355 if (U_FAILURE(status
)) {
358 if (m_breakiterator_
== NULL
359 #if !UCONFIG_NO_BREAK_ITERATION
361 m_search_
->matchedIndex
== USEARCH_DONE
||
362 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
363 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
364 m_search_
->matchedLength
))
367 if (m_search_
->matchedIndex
== USEARCH_DONE
) {
368 ucol_setOffset(m_strsrch_
->textIter
,
369 m_search_
->textLength
, &status
);
372 ucol_setOffset(m_strsrch_
->textIter
,
373 m_search_
->matchedIndex
, &status
);
375 return m_search_
->matchedIndex
;
379 // if m_strsrch_->breakIter is always the same as m_breakiterator_
380 // then we don't need to check the match boundaries here because
381 // usearch_handleNextXXX will already have done it.
382 if (m_search_
->isCanonicalMatch
) {
383 // *could* actually use exact here 'cause no extra accents allowed...
384 usearch_handleNextCanonical(m_strsrch_
, &status
);
386 usearch_handleNextExact(m_strsrch_
, &status
);
389 if (U_FAILURE(status
)) {
393 if (m_search_
->matchedIndex
== USEARCH_DONE
) {
394 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->textLength
, &status
);
396 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
, &status
);
399 return m_search_
->matchedIndex
;
406 int32_t StringSearch::handlePrev(int32_t position
, UErrorCode
&status
)
408 // values passed here are already in the pre-shift position
409 if (U_SUCCESS(status
)) {
410 if (m_strsrch_
->pattern
.cesLength
== 0) {
411 m_search_
->matchedIndex
=
412 (m_search_
->matchedIndex
== USEARCH_DONE
? getOffset() :
413 m_search_
->matchedIndex
);
414 if (m_search_
->matchedIndex
== 0) {
418 m_search_
->matchedIndex
--;
419 ucol_setOffset(m_strsrch_
->textIter
, m_search_
->matchedIndex
,
421 m_search_
->matchedLength
= 0;
425 // looking at usearch.cpp, this part is shifted out to
426 // StringSearch instead of SearchIterator because m_strsrch_ is
427 // not accessible in SearchIterator
429 if (!m_search_
->isOverlap
&&
430 position
- m_strsrch_
->pattern
.defaultShiftSize
< 0) {
436 if (m_search_
->isCanonicalMatch
) {
437 // can't use exact here since extra accents are allowed.
438 usearch_handlePreviousCanonical(m_strsrch_
, &status
);
441 usearch_handlePreviousExact(m_strsrch_
, &status
);
443 if (U_FAILURE(status
)) {
446 if (m_breakiterator_
== NULL
447 #if !UCONFIG_NO_BREAK_ITERATION
449 m_search_
->matchedIndex
== USEARCH_DONE
||
450 (m_breakiterator_
->isBoundary(m_search_
->matchedIndex
) &&
451 m_breakiterator_
->isBoundary(m_search_
->matchedIndex
+
452 m_search_
->matchedLength
))
455 return m_search_
->matchedIndex
;
459 ucol_setOffset(m_strsrch_
->textIter
, position
, &status
);
461 if (m_search_
->isCanonicalMatch
) {
462 // *could* use exact match here since extra accents *not* allowed!
463 usearch_handlePreviousCanonical(m_strsrch_
, &status
);
465 usearch_handlePreviousExact(m_strsrch_
, &status
);
468 if (U_FAILURE(status
)) {
472 return m_search_
->matchedIndex
;
476 return m_search_
->matchedIndex
;
483 #endif /* #if !UCONFIG_NO_COLLATION */