2 **********************************************************************
3 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/brkiter.h"
15 #include "unicode/schriter.h"
16 #include "unicode/search.h"
20 // public constructors and destructors -----------------------------------
23 SearchIterator::SearchIterator(const SearchIterator
&other
)
26 m_breakiterator_
= other
.m_breakiterator_
;
27 m_text_
= other
.m_text_
;
28 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
29 m_search_
->breakIter
= other
.m_search_
->breakIter
;
30 m_search_
->isCanonicalMatch
= other
.m_search_
->isCanonicalMatch
;
31 m_search_
->isOverlap
= other
.m_search_
->isOverlap
;
32 m_search_
->elementComparisonType
= other
.m_search_
->elementComparisonType
;
33 m_search_
->matchedIndex
= other
.m_search_
->matchedIndex
;
34 m_search_
->matchedLength
= other
.m_search_
->matchedLength
;
35 m_search_
->text
= other
.m_search_
->text
;
36 m_search_
->textLength
= other
.m_search_
->textLength
;
39 SearchIterator::~SearchIterator()
41 if (m_search_
!= NULL
) {
46 // public get and set methods ----------------------------------------
48 void SearchIterator::setAttribute(USearchAttribute attribute
,
49 USearchAttributeValue value
,
52 if (U_SUCCESS(status
)) {
55 case USEARCH_OVERLAP
:
56 m_search_
->isOverlap
= (value
== USEARCH_ON
? TRUE
: FALSE
);
58 case USEARCH_CANONICAL_MATCH
:
59 m_search_
->isCanonicalMatch
= (value
== USEARCH_ON
? TRUE
: FALSE
);
61 case USEARCH_ELEMENT_COMPARISON
:
62 if (value
== USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD
|| value
== USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD
) {
63 m_search_
->elementComparisonType
= (int16_t)value
;
65 m_search_
->elementComparisonType
= 0;
69 status
= U_ILLEGAL_ARGUMENT_ERROR
;
72 if (value
== USEARCH_ATTRIBUTE_VALUE_COUNT
) {
73 status
= U_ILLEGAL_ARGUMENT_ERROR
;
77 USearchAttributeValue
SearchIterator::getAttribute(
78 USearchAttribute attribute
) const
81 case USEARCH_OVERLAP
:
82 return (m_search_
->isOverlap
== TRUE
? USEARCH_ON
: USEARCH_OFF
);
83 case USEARCH_CANONICAL_MATCH
:
84 return (m_search_
->isCanonicalMatch
== TRUE
? USEARCH_ON
:
86 case USEARCH_ELEMENT_COMPARISON
:
88 int16_t value
= m_search_
->elementComparisonType
;
89 if (value
== USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD
|| value
== USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD
) {
90 return (USearchAttributeValue
)value
;
92 return USEARCH_STANDARD_ELEMENT_COMPARISON
;
96 return USEARCH_DEFAULT
;
100 int32_t SearchIterator::getMatchedStart() const
102 return m_search_
->matchedIndex
;
105 int32_t SearchIterator::getMatchedLength() const
107 return m_search_
->matchedLength
;
110 void SearchIterator::getMatchedText(UnicodeString
&result
) const
112 int32_t matchedindex
= m_search_
->matchedIndex
;
113 int32_t matchedlength
= m_search_
->matchedLength
;
114 if (matchedindex
!= USEARCH_DONE
&& matchedlength
!= 0) {
115 result
.setTo(m_search_
->text
+ matchedindex
, matchedlength
);
122 void SearchIterator::setBreakIterator(BreakIterator
*breakiter
,
125 if (U_SUCCESS(status
)) {
127 m_search_
->breakIter
= NULL
;
128 // the c++ breakiterator may not make use of ubreakiterator.
129 // so we'll have to keep track of it ourselves.
131 // Well, gee... the Constructors that take a BreakIterator
132 // all cast the BreakIterator to a UBreakIterator and
133 // pass it to the corresponding usearch_openFromXXX
134 // routine, so there's no reason not to do this.
136 // Besides, a UBreakIterator is a BreakIterator, so
137 // any subclass of BreakIterator should work fine here...
138 m_search_
->breakIter
= (UBreakIterator
*) breakiter
;
141 m_breakiterator_
= breakiter
;
145 const BreakIterator
* SearchIterator::getBreakIterator(void) const
147 return m_breakiterator_
;
150 void SearchIterator::setText(const UnicodeString
&text
, UErrorCode
&status
)
152 if (U_SUCCESS(status
)) {
153 if (text
.length() == 0) {
154 status
= U_ILLEGAL_ARGUMENT_ERROR
;
158 m_search_
->text
= m_text_
.getBuffer();
159 m_search_
->textLength
= m_text_
.length();
164 void SearchIterator::setText(CharacterIterator
&text
, UErrorCode
&status
)
166 if (U_SUCCESS(status
)) {
167 text
.getText(m_text_
);
168 setText(m_text_
, status
);
172 const UnicodeString
& SearchIterator::getText(void) const
177 // operator overloading ----------------------------------------------
179 UBool
SearchIterator::operator==(const SearchIterator
&that
) const
184 return (m_breakiterator_
== that
.m_breakiterator_
&&
185 m_search_
->isCanonicalMatch
== that
.m_search_
->isCanonicalMatch
&&
186 m_search_
->isOverlap
== that
.m_search_
->isOverlap
&&
187 m_search_
->elementComparisonType
== that
.m_search_
->elementComparisonType
&&
188 m_search_
->matchedIndex
== that
.m_search_
->matchedIndex
&&
189 m_search_
->matchedLength
== that
.m_search_
->matchedLength
&&
190 m_search_
->textLength
== that
.m_search_
->textLength
&&
191 getOffset() == that
.getOffset() &&
192 (uprv_memcmp(m_search_
->text
, that
.m_search_
->text
,
193 m_search_
->textLength
* sizeof(UChar
)) == 0));
196 // public methods ----------------------------------------------------
198 int32_t SearchIterator::first(UErrorCode
&status
)
200 if (U_FAILURE(status
)) {
203 setOffset(0, status
);
204 return handleNext(0, status
);
207 int32_t SearchIterator::following(int32_t position
,
210 if (U_FAILURE(status
)) {
213 setOffset(position
, status
);
214 return handleNext(position
, status
);
217 int32_t SearchIterator::last(UErrorCode
&status
)
219 if (U_FAILURE(status
)) {
222 setOffset(m_search_
->textLength
, status
);
223 return handlePrev(m_search_
->textLength
, status
);
226 int32_t SearchIterator::preceding(int32_t position
,
229 if (U_FAILURE(status
)) {
232 setOffset(position
, status
);
233 return handlePrev(position
, status
);
236 int32_t SearchIterator::next(UErrorCode
&status
)
238 if (U_SUCCESS(status
)) {
239 int32_t offset
= getOffset();
240 int32_t matchindex
= m_search_
->matchedIndex
;
241 int32_t matchlength
= m_search_
->matchedLength
;
242 m_search_
->reset
= FALSE
;
243 if (m_search_
->isForwardSearching
== TRUE
) {
244 int32_t textlength
= m_search_
->textLength
;
245 if (offset
== textlength
|| matchindex
== textlength
||
246 (matchindex
!= USEARCH_DONE
&&
247 matchindex
+ matchlength
>= textlength
)) {
248 // not enough characters to match
254 // switching direction.
255 // if matchedIndex == USEARCH_DONE, it means that either a
256 // setOffset has been called or that previous ran off the text
257 // string. the iterator would have been set to offset 0 if a
258 // match is not found.
259 m_search_
->isForwardSearching
= TRUE
;
260 if (m_search_
->matchedIndex
!= USEARCH_DONE
) {
261 // there's no need to set the collation element iterator
262 // the next call to next will set the offset.
267 if (matchlength
> 0) {
268 // if matchlength is 0 we are at the start of the iteration
269 if (m_search_
->isOverlap
) {
273 offset
+= matchlength
;
276 return handleNext(offset
, status
);
281 int32_t SearchIterator::previous(UErrorCode
&status
)
283 if (U_SUCCESS(status
)) {
285 if (m_search_
->reset
) {
286 offset
= m_search_
->textLength
;
287 m_search_
->isForwardSearching
= FALSE
;
288 m_search_
->reset
= FALSE
;
289 setOffset(offset
, status
);
292 offset
= getOffset();
295 int32_t matchindex
= m_search_
->matchedIndex
;
296 if (m_search_
->isForwardSearching
== TRUE
) {
297 // switching direction.
298 // if matchedIndex == USEARCH_DONE, it means that either a
299 // setOffset has been called or that next ran off the text
300 // string. the iterator would have been set to offset textLength if
301 // a match is not found.
302 m_search_
->isForwardSearching
= FALSE
;
303 if (matchindex
!= USEARCH_DONE
) {
308 if (offset
== 0 || matchindex
== 0) {
309 // not enough characters to match
315 if (matchindex
!= USEARCH_DONE
) {
316 if (m_search_
->isOverlap
) {
317 matchindex
+= m_search_
->matchedLength
- 2;
320 return handlePrev(matchindex
, status
);
323 return handlePrev(offset
, status
);
329 void SearchIterator::reset()
331 UErrorCode status
= U_ZERO_ERROR
;
333 setOffset(0, status
);
334 m_search_
->isOverlap
= FALSE
;
335 m_search_
->isCanonicalMatch
= FALSE
;
336 m_search_
->elementComparisonType
= 0;
337 m_search_
->isForwardSearching
= TRUE
;
338 m_search_
->reset
= TRUE
;
341 // protected constructors and destructors -----------------------------
343 SearchIterator::SearchIterator()
345 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
346 m_search_
->breakIter
= NULL
;
347 m_search_
->isOverlap
= FALSE
;
348 m_search_
->isCanonicalMatch
= FALSE
;
349 m_search_
->elementComparisonType
= 0;
350 m_search_
->isForwardSearching
= TRUE
;
351 m_search_
->reset
= TRUE
;
352 m_search_
->matchedIndex
= USEARCH_DONE
;
353 m_search_
->matchedLength
= 0;
354 m_search_
->text
= NULL
;
355 m_search_
->textLength
= 0;
356 m_breakiterator_
= NULL
;
359 SearchIterator::SearchIterator(const UnicodeString
&text
,
360 BreakIterator
*breakiter
) :
361 m_breakiterator_(breakiter
),
364 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
365 m_search_
->breakIter
= NULL
;
366 m_search_
->isOverlap
= FALSE
;
367 m_search_
->isCanonicalMatch
= FALSE
;
368 m_search_
->elementComparisonType
= 0;
369 m_search_
->isForwardSearching
= TRUE
;
370 m_search_
->reset
= TRUE
;
371 m_search_
->matchedIndex
= USEARCH_DONE
;
372 m_search_
->matchedLength
= 0;
373 m_search_
->text
= m_text_
.getBuffer();
374 m_search_
->textLength
= text
.length();
377 SearchIterator::SearchIterator(CharacterIterator
&text
,
378 BreakIterator
*breakiter
) :
379 m_breakiterator_(breakiter
)
381 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
382 m_search_
->breakIter
= NULL
;
383 m_search_
->isOverlap
= FALSE
;
384 m_search_
->isCanonicalMatch
= FALSE
;
385 m_search_
->elementComparisonType
= 0;
386 m_search_
->isForwardSearching
= TRUE
;
387 m_search_
->reset
= TRUE
;
388 m_search_
->matchedIndex
= USEARCH_DONE
;
389 m_search_
->matchedLength
= 0;
390 text
.getText(m_text_
);
391 m_search_
->text
= m_text_
.getBuffer();
392 m_search_
->textLength
= m_text_
.length();
393 m_breakiterator_
= breakiter
;
396 // protected methods ------------------------------------------------------
398 SearchIterator
& SearchIterator::operator=(const SearchIterator
&that
)
401 m_breakiterator_
= that
.m_breakiterator_
;
402 m_text_
= that
.m_text_
;
403 m_search_
->breakIter
= that
.m_search_
->breakIter
;
404 m_search_
->isCanonicalMatch
= that
.m_search_
->isCanonicalMatch
;
405 m_search_
->isOverlap
= that
.m_search_
->isOverlap
;
406 m_search_
->elementComparisonType
= that
.m_search_
->elementComparisonType
;
407 m_search_
->matchedIndex
= that
.m_search_
->matchedIndex
;
408 m_search_
->matchedLength
= that
.m_search_
->matchedLength
;
409 m_search_
->text
= that
.m_search_
->text
;
410 m_search_
->textLength
= that
.m_search_
->textLength
;
415 void SearchIterator::setMatchLength(int32_t length
)
417 m_search_
->matchedLength
= length
;
420 void SearchIterator::setMatchStart(int32_t position
)
422 m_search_
->matchedIndex
= position
;
425 void SearchIterator::setMatchNotFound()
427 setMatchStart(USEARCH_DONE
);
429 UErrorCode status
= U_ZERO_ERROR
;
430 // by default no errors should be returned here since offsets are within
432 if (m_search_
->isForwardSearching
) {
433 setOffset(m_search_
->textLength
, status
);
436 setOffset(0, status
);
443 #endif /* #if !UCONFIG_NO_COLLATION */