]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/search.cpp
ICU-400.40.tar.gz
[apple/icu.git] / icuSources / i18n / search.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2008 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/brkiter.h"
15 #include "unicode/schriter.h"
16 #include "unicode/search.h"
17 #include "usrchimp.h"
18 #include "cmemory.h"
19
20 // public constructors and destructors -----------------------------------
21 U_NAMESPACE_BEGIN
22
23 SearchIterator::SearchIterator(const SearchIterator &other)
24 : UObject(other)
25 {
26 m_breakiterator_ = other.m_breakiterator_;
27 m_text_ = other.m_text_;
28 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
29 m_search_->breakIter = other.m_search_->breakIter;
30 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
31 m_search_->isOverlap = other.m_search_->isOverlap;
32 m_search_->matchedIndex = other.m_search_->matchedIndex;
33 m_search_->matchedLength = other.m_search_->matchedLength;
34 m_search_->text = other.m_search_->text;
35 m_search_->textLength = other.m_search_->textLength;
36 }
37
38 SearchIterator::~SearchIterator()
39 {
40 if (m_search_ != NULL) {
41 uprv_free(m_search_);
42 }
43 }
44
45 // public get and set methods ----------------------------------------
46
47 void SearchIterator::setAttribute(USearchAttribute attribute,
48 USearchAttributeValue value,
49 UErrorCode &status)
50 {
51 if (U_SUCCESS(status)) {
52 switch (attribute)
53 {
54 case USEARCH_OVERLAP :
55 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
56 break;
57 case USEARCH_CANONICAL_MATCH :
58 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
59 break;
60 default:
61 status = U_ILLEGAL_ARGUMENT_ERROR;
62 }
63 }
64 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
65 status = U_ILLEGAL_ARGUMENT_ERROR;
66 }
67 }
68
69 USearchAttributeValue SearchIterator::getAttribute(
70 USearchAttribute attribute) const
71 {
72 switch (attribute) {
73 case USEARCH_OVERLAP :
74 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
75 case USEARCH_CANONICAL_MATCH :
76 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
77 USEARCH_OFF);
78 default :
79 return USEARCH_DEFAULT;
80 }
81 }
82
83 int32_t SearchIterator::getMatchedStart() const
84 {
85 return m_search_->matchedIndex;
86 }
87
88 int32_t SearchIterator::getMatchedLength() const
89 {
90 return m_search_->matchedLength;
91 }
92
93 void SearchIterator::getMatchedText(UnicodeString &result) const
94 {
95 int32_t matchedindex = m_search_->matchedIndex;
96 int32_t matchedlength = m_search_->matchedLength;
97 if (matchedindex != USEARCH_DONE && matchedlength != 0) {
98 result.setTo(m_search_->text + matchedindex, matchedlength);
99 }
100 else {
101 result.remove();
102 }
103 }
104
105 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
106 UErrorCode &status)
107 {
108 if (U_SUCCESS(status)) {
109 #if 0
110 m_search_->breakIter = NULL;
111 // the c++ breakiterator may not make use of ubreakiterator.
112 // so we'll have to keep track of it ourselves.
113 #else
114 // Well, gee... the Constructors that take a BreakIterator
115 // all cast the BreakIterator to a UBreakIterator and
116 // pass it to the corresponding usearch_openFromXXX
117 // routine, so there's no reason not to do this.
118 //
119 // Besides, a UBreakIterator is a BreakIterator, so
120 // any subclass of BreakIterator should work fine here...
121 m_search_->breakIter = (UBreakIterator *) breakiter;
122 #endif
123
124 m_breakiterator_ = breakiter;
125 }
126 }
127
128 const BreakIterator * SearchIterator::getBreakIterator(void) const
129 {
130 return m_breakiterator_;
131 }
132
133 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
134 {
135 if (U_SUCCESS(status)) {
136 if (text.length() == 0) {
137 status = U_ILLEGAL_ARGUMENT_ERROR;
138 }
139 else {
140 m_text_ = text;
141 m_search_->text = m_text_.getBuffer();
142 m_search_->textLength = m_text_.length();
143 }
144 }
145 }
146
147 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
148 {
149 if (U_SUCCESS(status)) {
150 text.getText(m_text_);
151 setText(m_text_, status);
152 }
153 }
154
155 const UnicodeString & SearchIterator::getText(void) const
156 {
157 return m_text_;
158 }
159
160 // operator overloading ----------------------------------------------
161
162 UBool SearchIterator::operator==(const SearchIterator &that) const
163 {
164 if (this == &that) {
165 return TRUE;
166 }
167 return (m_breakiterator_ == that.m_breakiterator_ &&
168 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
169 m_search_->isOverlap == that.m_search_->isOverlap &&
170 m_search_->matchedIndex == that.m_search_->matchedIndex &&
171 m_search_->matchedLength == that.m_search_->matchedLength &&
172 m_search_->textLength == that.m_search_->textLength &&
173 getOffset() == that.getOffset() &&
174 (uprv_memcmp(m_search_->text, that.m_search_->text,
175 m_search_->textLength * sizeof(UChar)) == 0));
176 }
177
178 // public methods ----------------------------------------------------
179
180 int32_t SearchIterator::first(UErrorCode &status)
181 {
182 if (U_FAILURE(status)) {
183 return USEARCH_DONE;
184 }
185 setOffset(0, status);
186 return handleNext(0, status);
187 }
188
189 int32_t SearchIterator::following(int32_t position,
190 UErrorCode &status)
191 {
192 if (U_FAILURE(status)) {
193 return USEARCH_DONE;
194 }
195 setOffset(position, status);
196 return handleNext(position, status);
197 }
198
199 int32_t SearchIterator::last(UErrorCode &status)
200 {
201 if (U_FAILURE(status)) {
202 return USEARCH_DONE;
203 }
204 setOffset(m_search_->textLength, status);
205 return handlePrev(m_search_->textLength, status);
206 }
207
208 int32_t SearchIterator::preceding(int32_t position,
209 UErrorCode &status)
210 {
211 if (U_FAILURE(status)) {
212 return USEARCH_DONE;
213 }
214 setOffset(position, status);
215 return handlePrev(position, status);
216 }
217
218 int32_t SearchIterator::next(UErrorCode &status)
219 {
220 if (U_SUCCESS(status)) {
221 int32_t offset = getOffset();
222 int32_t matchindex = m_search_->matchedIndex;
223 int32_t matchlength = m_search_->matchedLength;
224 m_search_->reset = FALSE;
225 if (m_search_->isForwardSearching == TRUE) {
226 int32_t textlength = m_search_->textLength;
227 if (offset == textlength || matchindex == textlength ||
228 (matchindex != USEARCH_DONE &&
229 matchindex + matchlength >= textlength)) {
230 // not enough characters to match
231 setMatchNotFound();
232 return USEARCH_DONE;
233 }
234 }
235 else {
236 // switching direction.
237 // if matchedIndex == USEARCH_DONE, it means that either a
238 // setOffset has been called or that previous ran off the text
239 // string. the iterator would have been set to offset 0 if a
240 // match is not found.
241 m_search_->isForwardSearching = TRUE;
242 if (m_search_->matchedIndex != USEARCH_DONE) {
243 // there's no need to set the collation element iterator
244 // the next call to next will set the offset.
245 return matchindex;
246 }
247 }
248
249 if (matchlength > 0) {
250 // if matchlength is 0 we are at the start of the iteration
251 if (m_search_->isOverlap) {
252 offset ++;
253 }
254 else {
255 offset += matchlength;
256 }
257 }
258 return handleNext(offset, status);
259 }
260 return USEARCH_DONE;
261 }
262
263 int32_t SearchIterator::previous(UErrorCode &status)
264 {
265 if (U_SUCCESS(status)) {
266 int32_t offset;
267 if (m_search_->reset) {
268 offset = m_search_->textLength;
269 m_search_->isForwardSearching = FALSE;
270 m_search_->reset = FALSE;
271 setOffset(offset, status);
272 }
273 else {
274 offset = getOffset();
275 }
276
277 int32_t matchindex = m_search_->matchedIndex;
278 if (m_search_->isForwardSearching == TRUE) {
279 // switching direction.
280 // if matchedIndex == USEARCH_DONE, it means that either a
281 // setOffset has been called or that next ran off the text
282 // string. the iterator would have been set to offset textLength if
283 // a match is not found.
284 m_search_->isForwardSearching = FALSE;
285 if (matchindex != USEARCH_DONE) {
286 return matchindex;
287 }
288 }
289 else {
290 if (offset == 0 || matchindex == 0) {
291 // not enough characters to match
292 setMatchNotFound();
293 return USEARCH_DONE;
294 }
295 }
296
297 if (matchindex != USEARCH_DONE) {
298 if (m_search_->isOverlap) {
299 matchindex += m_search_->matchedLength - 2;
300 }
301
302 return handlePrev(matchindex, status);
303 }
304
305 return handlePrev(offset, status);
306 }
307
308 return USEARCH_DONE;
309 }
310
311 void SearchIterator::reset()
312 {
313 UErrorCode status = U_ZERO_ERROR;
314 setMatchNotFound();
315 setOffset(0, status);
316 m_search_->isOverlap = FALSE;
317 m_search_->isCanonicalMatch = FALSE;
318 m_search_->isForwardSearching = TRUE;
319 m_search_->reset = TRUE;
320 }
321
322 // protected constructors and destructors -----------------------------
323
324 SearchIterator::SearchIterator()
325 {
326 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
327 m_search_->breakIter = NULL;
328 m_search_->isOverlap = FALSE;
329 m_search_->isCanonicalMatch = FALSE;
330 m_search_->isForwardSearching = TRUE;
331 m_search_->reset = TRUE;
332 m_search_->matchedIndex = USEARCH_DONE;
333 m_search_->matchedLength = 0;
334 m_search_->text = NULL;
335 m_search_->textLength = 0;
336 m_breakiterator_ = NULL;
337 }
338
339 SearchIterator::SearchIterator(const UnicodeString &text,
340 BreakIterator *breakiter) :
341 m_breakiterator_(breakiter),
342 m_text_(text)
343 {
344 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
345 m_search_->breakIter = NULL;
346 m_search_->isOverlap = FALSE;
347 m_search_->isCanonicalMatch = FALSE;
348 m_search_->isForwardSearching = TRUE;
349 m_search_->reset = TRUE;
350 m_search_->matchedIndex = USEARCH_DONE;
351 m_search_->matchedLength = 0;
352 m_search_->text = m_text_.getBuffer();
353 m_search_->textLength = text.length();
354 }
355
356 SearchIterator::SearchIterator(CharacterIterator &text,
357 BreakIterator *breakiter) :
358 m_breakiterator_(breakiter)
359 {
360 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
361 m_search_->breakIter = NULL;
362 m_search_->isOverlap = FALSE;
363 m_search_->isCanonicalMatch = FALSE;
364 m_search_->isForwardSearching = TRUE;
365 m_search_->reset = TRUE;
366 m_search_->matchedIndex = USEARCH_DONE;
367 m_search_->matchedLength = 0;
368 text.getText(m_text_);
369 m_search_->text = m_text_.getBuffer();
370 m_search_->textLength = m_text_.length();
371 m_breakiterator_ = breakiter;
372 }
373
374 // protected methods ------------------------------------------------------
375
376 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
377 {
378 if (this != &that) {
379 m_breakiterator_ = that.m_breakiterator_;
380 m_text_ = that.m_text_;
381 m_search_->breakIter = that.m_search_->breakIter;
382 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
383 m_search_->isOverlap = that.m_search_->isOverlap;
384 m_search_->matchedIndex = that.m_search_->matchedIndex;
385 m_search_->matchedLength = that.m_search_->matchedLength;
386 m_search_->text = that.m_search_->text;
387 m_search_->textLength = that.m_search_->textLength;
388 }
389 return *this;
390 }
391
392 void SearchIterator::setMatchLength(int32_t length)
393 {
394 m_search_->matchedLength = length;
395 }
396
397 void SearchIterator::setMatchStart(int32_t position)
398 {
399 m_search_->matchedIndex = position;
400 }
401
402 void SearchIterator::setMatchNotFound()
403 {
404 setMatchStart(USEARCH_DONE);
405 setMatchLength(0);
406 UErrorCode status = U_ZERO_ERROR;
407 // by default no errors should be returned here since offsets are within
408 // range.
409 if (m_search_->isForwardSearching) {
410 setOffset(m_search_->textLength, status);
411 }
412 else {
413 setOffset(0, status);
414 }
415 }
416
417
418 U_NAMESPACE_END
419
420 #endif /* #if !UCONFIG_NO_COLLATION */