1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
6 **********************************************************************
7 * Date Name Description
8 * 03/22/2000 helena Creation.
9 **********************************************************************
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/brkiter.h"
17 #include "unicode/schriter.h"
18 #include "unicode/search.h"
22 // public constructors and destructors -----------------------------------
25 SearchIterator::SearchIterator(const SearchIterator
&other
)
28 m_breakiterator_
= other
.m_breakiterator_
;
29 m_text_
= other
.m_text_
;
30 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
31 m_search_
->breakIter
= other
.m_search_
->breakIter
;
32 m_search_
->isCanonicalMatch
= other
.m_search_
->isCanonicalMatch
;
33 m_search_
->isOverlap
= other
.m_search_
->isOverlap
;
34 m_search_
->elementComparisonType
= other
.m_search_
->elementComparisonType
;
35 m_search_
->matchedIndex
= other
.m_search_
->matchedIndex
;
36 m_search_
->matchedLength
= other
.m_search_
->matchedLength
;
37 m_search_
->text
= other
.m_search_
->text
;
38 m_search_
->textLength
= other
.m_search_
->textLength
;
41 SearchIterator::~SearchIterator()
43 if (m_search_
!= NULL
) {
48 // public get and set methods ----------------------------------------
50 void SearchIterator::setAttribute(USearchAttribute attribute
,
51 USearchAttributeValue value
,
54 if (U_SUCCESS(status
)) {
57 case USEARCH_OVERLAP
:
58 m_search_
->isOverlap
= (value
== USEARCH_ON
? TRUE
: FALSE
);
60 case USEARCH_CANONICAL_MATCH
:
61 m_search_
->isCanonicalMatch
= (value
== USEARCH_ON
? TRUE
: FALSE
);
63 case USEARCH_ELEMENT_COMPARISON
:
64 if (value
== USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD
|| value
== USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD
) {
65 m_search_
->elementComparisonType
= (int16_t)value
;
67 m_search_
->elementComparisonType
= 0;
71 status
= U_ILLEGAL_ARGUMENT_ERROR
;
74 if (value
== USEARCH_ATTRIBUTE_VALUE_COUNT
) {
75 status
= U_ILLEGAL_ARGUMENT_ERROR
;
79 USearchAttributeValue
SearchIterator::getAttribute(
80 USearchAttribute attribute
) const
83 case USEARCH_OVERLAP
:
84 return (m_search_
->isOverlap
== TRUE
? USEARCH_ON
: USEARCH_OFF
);
85 case USEARCH_CANONICAL_MATCH
:
86 return (m_search_
->isCanonicalMatch
== TRUE
? USEARCH_ON
:
88 case USEARCH_ELEMENT_COMPARISON
:
90 int16_t value
= m_search_
->elementComparisonType
;
91 if (value
== USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD
|| value
== USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD
) {
92 return (USearchAttributeValue
)value
;
94 return USEARCH_STANDARD_ELEMENT_COMPARISON
;
98 return USEARCH_DEFAULT
;
102 int32_t SearchIterator::getMatchedStart() const
104 return m_search_
->matchedIndex
;
107 int32_t SearchIterator::getMatchedLength() const
109 return m_search_
->matchedLength
;
112 void SearchIterator::getMatchedText(UnicodeString
&result
) const
114 int32_t matchedindex
= m_search_
->matchedIndex
;
115 int32_t matchedlength
= m_search_
->matchedLength
;
116 if (matchedindex
!= USEARCH_DONE
&& matchedlength
!= 0) {
117 result
.setTo(m_search_
->text
+ matchedindex
, matchedlength
);
124 void SearchIterator::setBreakIterator(BreakIterator
*breakiter
,
127 if (U_SUCCESS(status
)) {
129 m_search_
->breakIter
= NULL
;
130 // the c++ breakiterator may not make use of ubreakiterator.
131 // so we'll have to keep track of it ourselves.
133 // Well, gee... the Constructors that take a BreakIterator
134 // all cast the BreakIterator to a UBreakIterator and
135 // pass it to the corresponding usearch_openFromXXX
136 // routine, so there's no reason not to do this.
138 // Besides, a UBreakIterator is a BreakIterator, so
139 // any subclass of BreakIterator should work fine here...
140 m_search_
->breakIter
= (UBreakIterator
*) breakiter
;
143 m_breakiterator_
= breakiter
;
147 const BreakIterator
* SearchIterator::getBreakIterator(void) const
149 return m_breakiterator_
;
152 void SearchIterator::setText(const UnicodeString
&text
, UErrorCode
&status
)
154 if (U_SUCCESS(status
)) {
155 if (text
.length() == 0) {
156 status
= U_ILLEGAL_ARGUMENT_ERROR
;
160 m_search_
->text
= m_text_
.getBuffer();
161 m_search_
->textLength
= m_text_
.length();
166 void SearchIterator::setText(CharacterIterator
&text
, UErrorCode
&status
)
168 if (U_SUCCESS(status
)) {
169 text
.getText(m_text_
);
170 setText(m_text_
, status
);
174 const UnicodeString
& SearchIterator::getText(void) const
179 // operator overloading ----------------------------------------------
181 UBool
SearchIterator::operator==(const SearchIterator
&that
) const
186 return (m_breakiterator_
== that
.m_breakiterator_
&&
187 m_search_
->isCanonicalMatch
== that
.m_search_
->isCanonicalMatch
&&
188 m_search_
->isOverlap
== that
.m_search_
->isOverlap
&&
189 m_search_
->elementComparisonType
== that
.m_search_
->elementComparisonType
&&
190 m_search_
->matchedIndex
== that
.m_search_
->matchedIndex
&&
191 m_search_
->matchedLength
== that
.m_search_
->matchedLength
&&
192 m_search_
->textLength
== that
.m_search_
->textLength
&&
193 getOffset() == that
.getOffset() &&
194 (uprv_memcmp(m_search_
->text
, that
.m_search_
->text
,
195 m_search_
->textLength
* sizeof(UChar
)) == 0));
198 // public methods ----------------------------------------------------
200 int32_t SearchIterator::first(UErrorCode
&status
)
202 if (U_FAILURE(status
)) {
205 setOffset(0, status
);
206 return handleNext(0, status
);
209 int32_t SearchIterator::following(int32_t position
,
212 if (U_FAILURE(status
)) {
215 setOffset(position
, status
);
216 return handleNext(position
, status
);
219 int32_t SearchIterator::last(UErrorCode
&status
)
221 if (U_FAILURE(status
)) {
224 setOffset(m_search_
->textLength
, status
);
225 return handlePrev(m_search_
->textLength
, status
);
228 int32_t SearchIterator::preceding(int32_t position
,
231 if (U_FAILURE(status
)) {
234 setOffset(position
, status
);
235 return handlePrev(position
, status
);
238 int32_t SearchIterator::next(UErrorCode
&status
)
240 if (U_SUCCESS(status
)) {
241 int32_t offset
= getOffset();
242 int32_t matchindex
= m_search_
->matchedIndex
;
243 int32_t matchlength
= m_search_
->matchedLength
;
244 m_search_
->reset
= FALSE
;
245 if (m_search_
->isForwardSearching
== TRUE
) {
246 int32_t textlength
= m_search_
->textLength
;
247 if (offset
== textlength
|| matchindex
== textlength
||
248 (matchindex
!= USEARCH_DONE
&&
249 matchindex
+ matchlength
>= textlength
)) {
250 // not enough characters to match
256 // switching direction.
257 // if matchedIndex == USEARCH_DONE, it means that either a
258 // setOffset has been called or that previous ran off the text
259 // string. the iterator would have been set to offset 0 if a
260 // match is not found.
261 m_search_
->isForwardSearching
= TRUE
;
262 if (m_search_
->matchedIndex
!= USEARCH_DONE
) {
263 // there's no need to set the collation element iterator
264 // the next call to next will set the offset.
269 if (matchlength
> 0) {
270 // if matchlength is 0 we are at the start of the iteration
271 if (m_search_
->isOverlap
) {
275 offset
+= matchlength
;
278 return handleNext(offset
, status
);
283 int32_t SearchIterator::previous(UErrorCode
&status
)
285 if (U_SUCCESS(status
)) {
287 if (m_search_
->reset
) {
288 offset
= m_search_
->textLength
;
289 m_search_
->isForwardSearching
= FALSE
;
290 m_search_
->reset
= FALSE
;
291 setOffset(offset
, status
);
294 offset
= getOffset();
297 int32_t matchindex
= m_search_
->matchedIndex
;
298 if (m_search_
->isForwardSearching
== TRUE
) {
299 // switching direction.
300 // if matchedIndex == USEARCH_DONE, it means that either a
301 // setOffset has been called or that next ran off the text
302 // string. the iterator would have been set to offset textLength if
303 // a match is not found.
304 m_search_
->isForwardSearching
= FALSE
;
305 if (matchindex
!= USEARCH_DONE
) {
310 if (offset
== 0 || matchindex
== 0) {
311 // not enough characters to match
317 if (matchindex
!= USEARCH_DONE
) {
318 if (m_search_
->isOverlap
) {
319 matchindex
+= m_search_
->matchedLength
- 2;
322 return handlePrev(matchindex
, status
);
325 return handlePrev(offset
, status
);
331 void SearchIterator::reset()
333 UErrorCode status
= U_ZERO_ERROR
;
335 setOffset(0, status
);
336 m_search_
->isOverlap
= FALSE
;
337 m_search_
->isCanonicalMatch
= FALSE
;
338 m_search_
->elementComparisonType
= 0;
339 m_search_
->isForwardSearching
= TRUE
;
340 m_search_
->reset
= TRUE
;
343 // protected constructors and destructors -----------------------------
345 SearchIterator::SearchIterator()
347 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
348 m_search_
->breakIter
= NULL
;
349 m_search_
->isOverlap
= FALSE
;
350 m_search_
->isCanonicalMatch
= FALSE
;
351 m_search_
->elementComparisonType
= 0;
352 m_search_
->isForwardSearching
= TRUE
;
353 m_search_
->reset
= TRUE
;
354 m_search_
->matchedIndex
= USEARCH_DONE
;
355 m_search_
->matchedLength
= 0;
356 m_search_
->text
= NULL
;
357 m_search_
->textLength
= 0;
358 m_breakiterator_
= NULL
;
361 SearchIterator::SearchIterator(const UnicodeString
&text
,
362 BreakIterator
*breakiter
) :
363 m_breakiterator_(breakiter
),
366 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
367 m_search_
->breakIter
= NULL
;
368 m_search_
->isOverlap
= FALSE
;
369 m_search_
->isCanonicalMatch
= FALSE
;
370 m_search_
->elementComparisonType
= 0;
371 m_search_
->isForwardSearching
= TRUE
;
372 m_search_
->reset
= TRUE
;
373 m_search_
->matchedIndex
= USEARCH_DONE
;
374 m_search_
->matchedLength
= 0;
375 m_search_
->text
= m_text_
.getBuffer();
376 m_search_
->textLength
= text
.length();
379 SearchIterator::SearchIterator(CharacterIterator
&text
,
380 BreakIterator
*breakiter
) :
381 m_breakiterator_(breakiter
)
383 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
384 m_search_
->breakIter
= NULL
;
385 m_search_
->isOverlap
= FALSE
;
386 m_search_
->isCanonicalMatch
= FALSE
;
387 m_search_
->elementComparisonType
= 0;
388 m_search_
->isForwardSearching
= TRUE
;
389 m_search_
->reset
= TRUE
;
390 m_search_
->matchedIndex
= USEARCH_DONE
;
391 m_search_
->matchedLength
= 0;
392 text
.getText(m_text_
);
393 m_search_
->text
= m_text_
.getBuffer();
394 m_search_
->textLength
= m_text_
.length();
395 m_breakiterator_
= breakiter
;
398 // protected methods ------------------------------------------------------
400 SearchIterator
& SearchIterator::operator=(const SearchIterator
&that
)
403 m_breakiterator_
= that
.m_breakiterator_
;
404 m_text_
= that
.m_text_
;
405 m_search_
->breakIter
= that
.m_search_
->breakIter
;
406 m_search_
->isCanonicalMatch
= that
.m_search_
->isCanonicalMatch
;
407 m_search_
->isOverlap
= that
.m_search_
->isOverlap
;
408 m_search_
->elementComparisonType
= that
.m_search_
->elementComparisonType
;
409 m_search_
->matchedIndex
= that
.m_search_
->matchedIndex
;
410 m_search_
->matchedLength
= that
.m_search_
->matchedLength
;
411 m_search_
->text
= that
.m_search_
->text
;
412 m_search_
->textLength
= that
.m_search_
->textLength
;
417 void SearchIterator::setMatchLength(int32_t length
)
419 m_search_
->matchedLength
= length
;
422 void SearchIterator::setMatchStart(int32_t position
)
424 m_search_
->matchedIndex
= position
;
427 void SearchIterator::setMatchNotFound()
429 setMatchStart(USEARCH_DONE
);
431 UErrorCode status
= U_ZERO_ERROR
;
432 // by default no errors should be returned here since offsets are within
434 if (m_search_
->isForwardSearching
) {
435 setOffset(m_search_
->textLength
, status
);
438 setOffset(0, status
);
445 #endif /* #if !UCONFIG_NO_COLLATION */