2 **********************************************************************
3 * Copyright (C) 2001-2008 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/brkiter.h"
15 #include "unicode/schriter.h"
16 #include "unicode/search.h"
20 // public constructors and destructors -----------------------------------
23 SearchIterator::SearchIterator(const SearchIterator
&other
)
26 m_breakiterator_
= other
.m_breakiterator_
;
27 m_text_
= other
.m_text_
;
28 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
29 m_search_
->breakIter
= other
.m_search_
->breakIter
;
30 m_search_
->isCanonicalMatch
= other
.m_search_
->isCanonicalMatch
;
31 m_search_
->isOverlap
= other
.m_search_
->isOverlap
;
32 m_search_
->matchedIndex
= other
.m_search_
->matchedIndex
;
33 m_search_
->matchedLength
= other
.m_search_
->matchedLength
;
34 m_search_
->text
= other
.m_search_
->text
;
35 m_search_
->textLength
= other
.m_search_
->textLength
;
38 SearchIterator::~SearchIterator()
40 if (m_search_
!= NULL
) {
45 // public get and set methods ----------------------------------------
47 void SearchIterator::setAttribute(USearchAttribute attribute
,
48 USearchAttributeValue value
,
51 if (U_SUCCESS(status
)) {
54 case USEARCH_OVERLAP
:
55 m_search_
->isOverlap
= (value
== USEARCH_ON
? TRUE
: FALSE
);
57 case USEARCH_CANONICAL_MATCH
:
58 m_search_
->isCanonicalMatch
= (value
== USEARCH_ON
? TRUE
: FALSE
);
61 status
= U_ILLEGAL_ARGUMENT_ERROR
;
64 if (value
== USEARCH_ATTRIBUTE_VALUE_COUNT
) {
65 status
= U_ILLEGAL_ARGUMENT_ERROR
;
69 USearchAttributeValue
SearchIterator::getAttribute(
70 USearchAttribute attribute
) const
73 case USEARCH_OVERLAP
:
74 return (m_search_
->isOverlap
== TRUE
? USEARCH_ON
: USEARCH_OFF
);
75 case USEARCH_CANONICAL_MATCH
:
76 return (m_search_
->isCanonicalMatch
== TRUE
? USEARCH_ON
:
79 return USEARCH_DEFAULT
;
83 int32_t SearchIterator::getMatchedStart() const
85 return m_search_
->matchedIndex
;
88 int32_t SearchIterator::getMatchedLength() const
90 return m_search_
->matchedLength
;
93 void SearchIterator::getMatchedText(UnicodeString
&result
) const
95 int32_t matchedindex
= m_search_
->matchedIndex
;
96 int32_t matchedlength
= m_search_
->matchedLength
;
97 if (matchedindex
!= USEARCH_DONE
&& matchedlength
!= 0) {
98 result
.setTo(m_search_
->text
+ matchedindex
, matchedlength
);
105 void SearchIterator::setBreakIterator(BreakIterator
*breakiter
,
108 if (U_SUCCESS(status
)) {
110 m_search_
->breakIter
= NULL
;
111 // the c++ breakiterator may not make use of ubreakiterator.
112 // so we'll have to keep track of it ourselves.
114 // Well, gee... the Constructors that take a BreakIterator
115 // all cast the BreakIterator to a UBreakIterator and
116 // pass it to the corresponding usearch_openFromXXX
117 // routine, so there's no reason not to do this.
119 // Besides, a UBreakIterator is a BreakIterator, so
120 // any subclass of BreakIterator should work fine here...
121 m_search_
->breakIter
= (UBreakIterator
*) breakiter
;
124 m_breakiterator_
= breakiter
;
128 const BreakIterator
* SearchIterator::getBreakIterator(void) const
130 return m_breakiterator_
;
133 void SearchIterator::setText(const UnicodeString
&text
, UErrorCode
&status
)
135 if (U_SUCCESS(status
)) {
136 if (text
.length() == 0) {
137 status
= U_ILLEGAL_ARGUMENT_ERROR
;
141 m_search_
->text
= m_text_
.getBuffer();
142 m_search_
->textLength
= m_text_
.length();
147 void SearchIterator::setText(CharacterIterator
&text
, UErrorCode
&status
)
149 if (U_SUCCESS(status
)) {
150 text
.getText(m_text_
);
151 setText(m_text_
, status
);
155 const UnicodeString
& SearchIterator::getText(void) const
160 // operator overloading ----------------------------------------------
162 UBool
SearchIterator::operator==(const SearchIterator
&that
) const
167 return (m_breakiterator_
== that
.m_breakiterator_
&&
168 m_search_
->isCanonicalMatch
== that
.m_search_
->isCanonicalMatch
&&
169 m_search_
->isOverlap
== that
.m_search_
->isOverlap
&&
170 m_search_
->matchedIndex
== that
.m_search_
->matchedIndex
&&
171 m_search_
->matchedLength
== that
.m_search_
->matchedLength
&&
172 m_search_
->textLength
== that
.m_search_
->textLength
&&
173 getOffset() == that
.getOffset() &&
174 (uprv_memcmp(m_search_
->text
, that
.m_search_
->text
,
175 m_search_
->textLength
* sizeof(UChar
)) == 0));
178 // public methods ----------------------------------------------------
180 int32_t SearchIterator::first(UErrorCode
&status
)
182 if (U_FAILURE(status
)) {
185 setOffset(0, status
);
186 return handleNext(0, status
);
189 int32_t SearchIterator::following(int32_t position
,
192 if (U_FAILURE(status
)) {
195 setOffset(position
, status
);
196 return handleNext(position
, status
);
199 int32_t SearchIterator::last(UErrorCode
&status
)
201 if (U_FAILURE(status
)) {
204 setOffset(m_search_
->textLength
, status
);
205 return handlePrev(m_search_
->textLength
, status
);
208 int32_t SearchIterator::preceding(int32_t position
,
211 if (U_FAILURE(status
)) {
214 setOffset(position
, status
);
215 return handlePrev(position
, status
);
218 int32_t SearchIterator::next(UErrorCode
&status
)
220 if (U_SUCCESS(status
)) {
221 int32_t offset
= getOffset();
222 int32_t matchindex
= m_search_
->matchedIndex
;
223 int32_t matchlength
= m_search_
->matchedLength
;
224 m_search_
->reset
= FALSE
;
225 if (m_search_
->isForwardSearching
== TRUE
) {
226 int32_t textlength
= m_search_
->textLength
;
227 if (offset
== textlength
|| matchindex
== textlength
||
228 (matchindex
!= USEARCH_DONE
&&
229 matchindex
+ matchlength
>= textlength
)) {
230 // not enough characters to match
236 // switching direction.
237 // if matchedIndex == USEARCH_DONE, it means that either a
238 // setOffset has been called or that previous ran off the text
239 // string. the iterator would have been set to offset 0 if a
240 // match is not found.
241 m_search_
->isForwardSearching
= TRUE
;
242 if (m_search_
->matchedIndex
!= USEARCH_DONE
) {
243 // there's no need to set the collation element iterator
244 // the next call to next will set the offset.
249 if (matchlength
> 0) {
250 // if matchlength is 0 we are at the start of the iteration
251 if (m_search_
->isOverlap
) {
255 offset
+= matchlength
;
258 return handleNext(offset
, status
);
263 int32_t SearchIterator::previous(UErrorCode
&status
)
265 if (U_SUCCESS(status
)) {
267 if (m_search_
->reset
) {
268 offset
= m_search_
->textLength
;
269 m_search_
->isForwardSearching
= FALSE
;
270 m_search_
->reset
= FALSE
;
271 setOffset(offset
, status
);
274 offset
= getOffset();
277 int32_t matchindex
= m_search_
->matchedIndex
;
278 if (m_search_
->isForwardSearching
== TRUE
) {
279 // switching direction.
280 // if matchedIndex == USEARCH_DONE, it means that either a
281 // setOffset has been called or that next ran off the text
282 // string. the iterator would have been set to offset textLength if
283 // a match is not found.
284 m_search_
->isForwardSearching
= FALSE
;
285 if (matchindex
!= USEARCH_DONE
) {
290 if (offset
== 0 || matchindex
== 0) {
291 // not enough characters to match
297 if (matchindex
!= USEARCH_DONE
) {
298 if (m_search_
->isOverlap
) {
299 matchindex
+= m_search_
->matchedLength
- 2;
302 return handlePrev(matchindex
, status
);
305 return handlePrev(offset
, status
);
311 void SearchIterator::reset()
313 UErrorCode status
= U_ZERO_ERROR
;
315 setOffset(0, status
);
316 m_search_
->isOverlap
= FALSE
;
317 m_search_
->isCanonicalMatch
= FALSE
;
318 m_search_
->isForwardSearching
= TRUE
;
319 m_search_
->reset
= TRUE
;
322 // protected constructors and destructors -----------------------------
324 SearchIterator::SearchIterator()
326 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
327 m_search_
->breakIter
= NULL
;
328 m_search_
->isOverlap
= FALSE
;
329 m_search_
->isCanonicalMatch
= FALSE
;
330 m_search_
->isForwardSearching
= TRUE
;
331 m_search_
->reset
= TRUE
;
332 m_search_
->matchedIndex
= USEARCH_DONE
;
333 m_search_
->matchedLength
= 0;
334 m_search_
->text
= NULL
;
335 m_search_
->textLength
= 0;
336 m_breakiterator_
= NULL
;
339 SearchIterator::SearchIterator(const UnicodeString
&text
,
340 BreakIterator
*breakiter
) :
341 m_breakiterator_(breakiter
),
344 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
345 m_search_
->breakIter
= NULL
;
346 m_search_
->isOverlap
= FALSE
;
347 m_search_
->isCanonicalMatch
= FALSE
;
348 m_search_
->isForwardSearching
= TRUE
;
349 m_search_
->reset
= TRUE
;
350 m_search_
->matchedIndex
= USEARCH_DONE
;
351 m_search_
->matchedLength
= 0;
352 m_search_
->text
= m_text_
.getBuffer();
353 m_search_
->textLength
= text
.length();
356 SearchIterator::SearchIterator(CharacterIterator
&text
,
357 BreakIterator
*breakiter
) :
358 m_breakiterator_(breakiter
)
360 m_search_
= (USearch
*)uprv_malloc(sizeof(USearch
));
361 m_search_
->breakIter
= NULL
;
362 m_search_
->isOverlap
= FALSE
;
363 m_search_
->isCanonicalMatch
= FALSE
;
364 m_search_
->isForwardSearching
= TRUE
;
365 m_search_
->reset
= TRUE
;
366 m_search_
->matchedIndex
= USEARCH_DONE
;
367 m_search_
->matchedLength
= 0;
368 text
.getText(m_text_
);
369 m_search_
->text
= m_text_
.getBuffer();
370 m_search_
->textLength
= m_text_
.length();
371 m_breakiterator_
= breakiter
;
374 // protected methods ------------------------------------------------------
376 SearchIterator
& SearchIterator::operator=(const SearchIterator
&that
)
379 m_breakiterator_
= that
.m_breakiterator_
;
380 m_text_
= that
.m_text_
;
381 m_search_
->breakIter
= that
.m_search_
->breakIter
;
382 m_search_
->isCanonicalMatch
= that
.m_search_
->isCanonicalMatch
;
383 m_search_
->isOverlap
= that
.m_search_
->isOverlap
;
384 m_search_
->matchedIndex
= that
.m_search_
->matchedIndex
;
385 m_search_
->matchedLength
= that
.m_search_
->matchedLength
;
386 m_search_
->text
= that
.m_search_
->text
;
387 m_search_
->textLength
= that
.m_search_
->textLength
;
392 void SearchIterator::setMatchLength(int32_t length
)
394 m_search_
->matchedLength
= length
;
397 void SearchIterator::setMatchStart(int32_t position
)
399 m_search_
->matchedIndex
= position
;
402 void SearchIterator::setMatchNotFound()
404 setMatchStart(USEARCH_DONE
);
406 UErrorCode status
= U_ZERO_ERROR
;
407 // by default no errors should be returned here since offsets are within
409 if (m_search_
->isForwardSearching
) {
410 setOffset(m_search_
->textLength
, status
);
413 setOffset(0, status
);
420 #endif /* #if !UCONFIG_NO_COLLATION */