]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/search.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / i18n / search.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/brkiter.h"
15 #include "unicode/schriter.h"
16 #include "unicode/search.h"
17 #include "usrchimp.h"
18 #include "cmemory.h"
19
20 // public constructors and destructors -----------------------------------
21 U_NAMESPACE_BEGIN
22
23 SearchIterator::SearchIterator(const SearchIterator &other)
24 : UObject(other)
25 {
26 m_breakiterator_ = other.m_breakiterator_;
27 m_text_ = other.m_text_;
28 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
29 m_search_->breakIter = other.m_search_->breakIter;
30 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
31 m_search_->isOverlap = other.m_search_->isOverlap;
32 m_search_->elementComparisonType = other.m_search_->elementComparisonType;
33 m_search_->matchedIndex = other.m_search_->matchedIndex;
34 m_search_->matchedLength = other.m_search_->matchedLength;
35 m_search_->text = other.m_search_->text;
36 m_search_->textLength = other.m_search_->textLength;
37 }
38
39 SearchIterator::~SearchIterator()
40 {
41 if (m_search_ != NULL) {
42 uprv_free(m_search_);
43 }
44 }
45
46 // public get and set methods ----------------------------------------
47
48 void SearchIterator::setAttribute(USearchAttribute attribute,
49 USearchAttributeValue value,
50 UErrorCode &status)
51 {
52 if (U_SUCCESS(status)) {
53 switch (attribute)
54 {
55 case USEARCH_OVERLAP :
56 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
57 break;
58 case USEARCH_CANONICAL_MATCH :
59 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
60 break;
61 case USEARCH_ELEMENT_COMPARISON :
62 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
63 m_search_->elementComparisonType = (int16_t)value;
64 } else {
65 m_search_->elementComparisonType = 0;
66 }
67 break;
68 default:
69 status = U_ILLEGAL_ARGUMENT_ERROR;
70 }
71 }
72 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
73 status = U_ILLEGAL_ARGUMENT_ERROR;
74 }
75 }
76
77 USearchAttributeValue SearchIterator::getAttribute(
78 USearchAttribute attribute) const
79 {
80 switch (attribute) {
81 case USEARCH_OVERLAP :
82 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
83 case USEARCH_CANONICAL_MATCH :
84 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
85 USEARCH_OFF);
86 case USEARCH_ELEMENT_COMPARISON :
87 {
88 int16_t value = m_search_->elementComparisonType;
89 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
90 return (USearchAttributeValue)value;
91 } else {
92 return USEARCH_STANDARD_ELEMENT_COMPARISON;
93 }
94 }
95 default :
96 return USEARCH_DEFAULT;
97 }
98 }
99
100 int32_t SearchIterator::getMatchedStart() const
101 {
102 return m_search_->matchedIndex;
103 }
104
105 int32_t SearchIterator::getMatchedLength() const
106 {
107 return m_search_->matchedLength;
108 }
109
110 void SearchIterator::getMatchedText(UnicodeString &result) const
111 {
112 int32_t matchedindex = m_search_->matchedIndex;
113 int32_t matchedlength = m_search_->matchedLength;
114 if (matchedindex != USEARCH_DONE && matchedlength != 0) {
115 result.setTo(m_search_->text + matchedindex, matchedlength);
116 }
117 else {
118 result.remove();
119 }
120 }
121
122 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
123 UErrorCode &status)
124 {
125 if (U_SUCCESS(status)) {
126 #if 0
127 m_search_->breakIter = NULL;
128 // the c++ breakiterator may not make use of ubreakiterator.
129 // so we'll have to keep track of it ourselves.
130 #else
131 // Well, gee... the Constructors that take a BreakIterator
132 // all cast the BreakIterator to a UBreakIterator and
133 // pass it to the corresponding usearch_openFromXXX
134 // routine, so there's no reason not to do this.
135 //
136 // Besides, a UBreakIterator is a BreakIterator, so
137 // any subclass of BreakIterator should work fine here...
138 m_search_->breakIter = (UBreakIterator *) breakiter;
139 #endif
140
141 m_breakiterator_ = breakiter;
142 }
143 }
144
145 const BreakIterator * SearchIterator::getBreakIterator(void) const
146 {
147 return m_breakiterator_;
148 }
149
150 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
151 {
152 if (U_SUCCESS(status)) {
153 if (text.length() == 0) {
154 status = U_ILLEGAL_ARGUMENT_ERROR;
155 }
156 else {
157 m_text_ = text;
158 m_search_->text = m_text_.getBuffer();
159 m_search_->textLength = m_text_.length();
160 }
161 }
162 }
163
164 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
165 {
166 if (U_SUCCESS(status)) {
167 text.getText(m_text_);
168 setText(m_text_, status);
169 }
170 }
171
172 const UnicodeString & SearchIterator::getText(void) const
173 {
174 return m_text_;
175 }
176
177 // operator overloading ----------------------------------------------
178
179 UBool SearchIterator::operator==(const SearchIterator &that) const
180 {
181 if (this == &that) {
182 return TRUE;
183 }
184 return (m_breakiterator_ == that.m_breakiterator_ &&
185 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
186 m_search_->isOverlap == that.m_search_->isOverlap &&
187 m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
188 m_search_->matchedIndex == that.m_search_->matchedIndex &&
189 m_search_->matchedLength == that.m_search_->matchedLength &&
190 m_search_->textLength == that.m_search_->textLength &&
191 getOffset() == that.getOffset() &&
192 (uprv_memcmp(m_search_->text, that.m_search_->text,
193 m_search_->textLength * sizeof(UChar)) == 0));
194 }
195
196 // public methods ----------------------------------------------------
197
198 int32_t SearchIterator::first(UErrorCode &status)
199 {
200 if (U_FAILURE(status)) {
201 return USEARCH_DONE;
202 }
203 setOffset(0, status);
204 return handleNext(0, status);
205 }
206
207 int32_t SearchIterator::following(int32_t position,
208 UErrorCode &status)
209 {
210 if (U_FAILURE(status)) {
211 return USEARCH_DONE;
212 }
213 setOffset(position, status);
214 return handleNext(position, status);
215 }
216
217 int32_t SearchIterator::last(UErrorCode &status)
218 {
219 if (U_FAILURE(status)) {
220 return USEARCH_DONE;
221 }
222 setOffset(m_search_->textLength, status);
223 return handlePrev(m_search_->textLength, status);
224 }
225
226 int32_t SearchIterator::preceding(int32_t position,
227 UErrorCode &status)
228 {
229 if (U_FAILURE(status)) {
230 return USEARCH_DONE;
231 }
232 setOffset(position, status);
233 return handlePrev(position, status);
234 }
235
236 int32_t SearchIterator::next(UErrorCode &status)
237 {
238 if (U_SUCCESS(status)) {
239 int32_t offset = getOffset();
240 int32_t matchindex = m_search_->matchedIndex;
241 int32_t matchlength = m_search_->matchedLength;
242 m_search_->reset = FALSE;
243 if (m_search_->isForwardSearching == TRUE) {
244 int32_t textlength = m_search_->textLength;
245 if (offset == textlength || matchindex == textlength ||
246 (matchindex != USEARCH_DONE &&
247 matchindex + matchlength >= textlength)) {
248 // not enough characters to match
249 setMatchNotFound();
250 return USEARCH_DONE;
251 }
252 }
253 else {
254 // switching direction.
255 // if matchedIndex == USEARCH_DONE, it means that either a
256 // setOffset has been called or that previous ran off the text
257 // string. the iterator would have been set to offset 0 if a
258 // match is not found.
259 m_search_->isForwardSearching = TRUE;
260 if (m_search_->matchedIndex != USEARCH_DONE) {
261 // there's no need to set the collation element iterator
262 // the next call to next will set the offset.
263 return matchindex;
264 }
265 }
266
267 if (matchlength > 0) {
268 // if matchlength is 0 we are at the start of the iteration
269 if (m_search_->isOverlap) {
270 offset ++;
271 }
272 else {
273 offset += matchlength;
274 }
275 }
276 return handleNext(offset, status);
277 }
278 return USEARCH_DONE;
279 }
280
281 int32_t SearchIterator::previous(UErrorCode &status)
282 {
283 if (U_SUCCESS(status)) {
284 int32_t offset;
285 if (m_search_->reset) {
286 offset = m_search_->textLength;
287 m_search_->isForwardSearching = FALSE;
288 m_search_->reset = FALSE;
289 setOffset(offset, status);
290 }
291 else {
292 offset = getOffset();
293 }
294
295 int32_t matchindex = m_search_->matchedIndex;
296 if (m_search_->isForwardSearching == TRUE) {
297 // switching direction.
298 // if matchedIndex == USEARCH_DONE, it means that either a
299 // setOffset has been called or that next ran off the text
300 // string. the iterator would have been set to offset textLength if
301 // a match is not found.
302 m_search_->isForwardSearching = FALSE;
303 if (matchindex != USEARCH_DONE) {
304 return matchindex;
305 }
306 }
307 else {
308 if (offset == 0 || matchindex == 0) {
309 // not enough characters to match
310 setMatchNotFound();
311 return USEARCH_DONE;
312 }
313 }
314
315 if (matchindex != USEARCH_DONE) {
316 if (m_search_->isOverlap) {
317 matchindex += m_search_->matchedLength - 2;
318 }
319
320 return handlePrev(matchindex, status);
321 }
322
323 return handlePrev(offset, status);
324 }
325
326 return USEARCH_DONE;
327 }
328
329 void SearchIterator::reset()
330 {
331 UErrorCode status = U_ZERO_ERROR;
332 setMatchNotFound();
333 setOffset(0, status);
334 m_search_->isOverlap = FALSE;
335 m_search_->isCanonicalMatch = FALSE;
336 m_search_->elementComparisonType = 0;
337 m_search_->isForwardSearching = TRUE;
338 m_search_->reset = TRUE;
339 }
340
341 // protected constructors and destructors -----------------------------
342
343 SearchIterator::SearchIterator()
344 {
345 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
346 m_search_->breakIter = NULL;
347 m_search_->isOverlap = FALSE;
348 m_search_->isCanonicalMatch = FALSE;
349 m_search_->elementComparisonType = 0;
350 m_search_->isForwardSearching = TRUE;
351 m_search_->reset = TRUE;
352 m_search_->matchedIndex = USEARCH_DONE;
353 m_search_->matchedLength = 0;
354 m_search_->text = NULL;
355 m_search_->textLength = 0;
356 m_breakiterator_ = NULL;
357 }
358
359 SearchIterator::SearchIterator(const UnicodeString &text,
360 BreakIterator *breakiter) :
361 m_breakiterator_(breakiter),
362 m_text_(text)
363 {
364 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
365 m_search_->breakIter = NULL;
366 m_search_->isOverlap = FALSE;
367 m_search_->isCanonicalMatch = FALSE;
368 m_search_->elementComparisonType = 0;
369 m_search_->isForwardSearching = TRUE;
370 m_search_->reset = TRUE;
371 m_search_->matchedIndex = USEARCH_DONE;
372 m_search_->matchedLength = 0;
373 m_search_->text = m_text_.getBuffer();
374 m_search_->textLength = text.length();
375 }
376
377 SearchIterator::SearchIterator(CharacterIterator &text,
378 BreakIterator *breakiter) :
379 m_breakiterator_(breakiter)
380 {
381 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
382 m_search_->breakIter = NULL;
383 m_search_->isOverlap = FALSE;
384 m_search_->isCanonicalMatch = FALSE;
385 m_search_->elementComparisonType = 0;
386 m_search_->isForwardSearching = TRUE;
387 m_search_->reset = TRUE;
388 m_search_->matchedIndex = USEARCH_DONE;
389 m_search_->matchedLength = 0;
390 text.getText(m_text_);
391 m_search_->text = m_text_.getBuffer();
392 m_search_->textLength = m_text_.length();
393 m_breakiterator_ = breakiter;
394 }
395
396 // protected methods ------------------------------------------------------
397
398 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
399 {
400 if (this != &that) {
401 m_breakiterator_ = that.m_breakiterator_;
402 m_text_ = that.m_text_;
403 m_search_->breakIter = that.m_search_->breakIter;
404 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
405 m_search_->isOverlap = that.m_search_->isOverlap;
406 m_search_->elementComparisonType = that.m_search_->elementComparisonType;
407 m_search_->matchedIndex = that.m_search_->matchedIndex;
408 m_search_->matchedLength = that.m_search_->matchedLength;
409 m_search_->text = that.m_search_->text;
410 m_search_->textLength = that.m_search_->textLength;
411 }
412 return *this;
413 }
414
415 void SearchIterator::setMatchLength(int32_t length)
416 {
417 m_search_->matchedLength = length;
418 }
419
420 void SearchIterator::setMatchStart(int32_t position)
421 {
422 m_search_->matchedIndex = position;
423 }
424
425 void SearchIterator::setMatchNotFound()
426 {
427 setMatchStart(USEARCH_DONE);
428 setMatchLength(0);
429 UErrorCode status = U_ZERO_ERROR;
430 // by default no errors should be returned here since offsets are within
431 // range.
432 if (m_search_->isForwardSearching) {
433 setOffset(m_search_->textLength, status);
434 }
435 else {
436 setOffset(0, status);
437 }
438 }
439
440
441 U_NAMESPACE_END
442
443 #endif /* #if !UCONFIG_NO_COLLATION */