]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/i18n/search.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / search.cpp
... / ...
CommitLineData
1/*
2**********************************************************************
3* Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
4**********************************************************************
5* Date Name Description
6* 03/22/2000 helena Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/brkiter.h"
15#include "unicode/schriter.h"
16#include "unicode/search.h"
17#include "usrchimp.h"
18#include "cmemory.h"
19
20// public constructors and destructors -----------------------------------
21U_NAMESPACE_BEGIN
22
23SearchIterator::SearchIterator(const SearchIterator &other)
24 : UObject(other)
25{
26 m_breakiterator_ = other.m_breakiterator_;
27 m_text_ = other.m_text_;
28 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
29 m_search_->breakIter = other.m_search_->breakIter;
30 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
31 m_search_->isOverlap = other.m_search_->isOverlap;
32 m_search_->elementComparisonType = other.m_search_->elementComparisonType;
33 m_search_->matchedIndex = other.m_search_->matchedIndex;
34 m_search_->matchedLength = other.m_search_->matchedLength;
35 m_search_->text = other.m_search_->text;
36 m_search_->textLength = other.m_search_->textLength;
37}
38
39SearchIterator::~SearchIterator()
40{
41 if (m_search_ != NULL) {
42 uprv_free(m_search_);
43 }
44}
45
46// public get and set methods ----------------------------------------
47
48void SearchIterator::setAttribute(USearchAttribute attribute,
49 USearchAttributeValue value,
50 UErrorCode &status)
51{
52 if (U_SUCCESS(status)) {
53 switch (attribute)
54 {
55 case USEARCH_OVERLAP :
56 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
57 break;
58 case USEARCH_CANONICAL_MATCH :
59 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
60 break;
61 case USEARCH_ELEMENT_COMPARISON :
62 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
63 m_search_->elementComparisonType = (int16_t)value;
64 } else {
65 m_search_->elementComparisonType = 0;
66 }
67 break;
68 default:
69 status = U_ILLEGAL_ARGUMENT_ERROR;
70 }
71 }
72 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
73 status = U_ILLEGAL_ARGUMENT_ERROR;
74 }
75}
76
77USearchAttributeValue SearchIterator::getAttribute(
78 USearchAttribute attribute) const
79{
80 switch (attribute) {
81 case USEARCH_OVERLAP :
82 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
83 case USEARCH_CANONICAL_MATCH :
84 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
85 USEARCH_OFF);
86 case USEARCH_ELEMENT_COMPARISON :
87 {
88 int16_t value = m_search_->elementComparisonType;
89 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
90 return (USearchAttributeValue)value;
91 } else {
92 return USEARCH_STANDARD_ELEMENT_COMPARISON;
93 }
94 }
95 default :
96 return USEARCH_DEFAULT;
97 }
98}
99
100int32_t SearchIterator::getMatchedStart() const
101{
102 return m_search_->matchedIndex;
103}
104
105int32_t SearchIterator::getMatchedLength() const
106{
107 return m_search_->matchedLength;
108}
109
110void SearchIterator::getMatchedText(UnicodeString &result) const
111{
112 int32_t matchedindex = m_search_->matchedIndex;
113 int32_t matchedlength = m_search_->matchedLength;
114 if (matchedindex != USEARCH_DONE && matchedlength != 0) {
115 result.setTo(m_search_->text + matchedindex, matchedlength);
116 }
117 else {
118 result.remove();
119 }
120}
121
122void SearchIterator::setBreakIterator(BreakIterator *breakiter,
123 UErrorCode &status)
124{
125 if (U_SUCCESS(status)) {
126#if 0
127 m_search_->breakIter = NULL;
128 // the c++ breakiterator may not make use of ubreakiterator.
129 // so we'll have to keep track of it ourselves.
130#else
131 // Well, gee... the Constructors that take a BreakIterator
132 // all cast the BreakIterator to a UBreakIterator and
133 // pass it to the corresponding usearch_openFromXXX
134 // routine, so there's no reason not to do this.
135 //
136 // Besides, a UBreakIterator is a BreakIterator, so
137 // any subclass of BreakIterator should work fine here...
138 m_search_->breakIter = (UBreakIterator *) breakiter;
139#endif
140
141 m_breakiterator_ = breakiter;
142 }
143}
144
145const BreakIterator * SearchIterator::getBreakIterator(void) const
146{
147 return m_breakiterator_;
148}
149
150void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
151{
152 if (U_SUCCESS(status)) {
153 if (text.length() == 0) {
154 status = U_ILLEGAL_ARGUMENT_ERROR;
155 }
156 else {
157 m_text_ = text;
158 m_search_->text = m_text_.getBuffer();
159 m_search_->textLength = m_text_.length();
160 }
161 }
162}
163
164void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
165{
166 if (U_SUCCESS(status)) {
167 text.getText(m_text_);
168 setText(m_text_, status);
169 }
170}
171
172const UnicodeString & SearchIterator::getText(void) const
173{
174 return m_text_;
175}
176
177// operator overloading ----------------------------------------------
178
179UBool SearchIterator::operator==(const SearchIterator &that) const
180{
181 if (this == &that) {
182 return TRUE;
183 }
184 return (m_breakiterator_ == that.m_breakiterator_ &&
185 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
186 m_search_->isOverlap == that.m_search_->isOverlap &&
187 m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
188 m_search_->matchedIndex == that.m_search_->matchedIndex &&
189 m_search_->matchedLength == that.m_search_->matchedLength &&
190 m_search_->textLength == that.m_search_->textLength &&
191 getOffset() == that.getOffset() &&
192 (uprv_memcmp(m_search_->text, that.m_search_->text,
193 m_search_->textLength * sizeof(UChar)) == 0));
194}
195
196// public methods ----------------------------------------------------
197
198int32_t SearchIterator::first(UErrorCode &status)
199{
200 if (U_FAILURE(status)) {
201 return USEARCH_DONE;
202 }
203 setOffset(0, status);
204 return handleNext(0, status);
205}
206
207int32_t SearchIterator::following(int32_t position,
208 UErrorCode &status)
209{
210 if (U_FAILURE(status)) {
211 return USEARCH_DONE;
212 }
213 setOffset(position, status);
214 return handleNext(position, status);
215}
216
217int32_t SearchIterator::last(UErrorCode &status)
218{
219 if (U_FAILURE(status)) {
220 return USEARCH_DONE;
221 }
222 setOffset(m_search_->textLength, status);
223 return handlePrev(m_search_->textLength, status);
224}
225
226int32_t SearchIterator::preceding(int32_t position,
227 UErrorCode &status)
228{
229 if (U_FAILURE(status)) {
230 return USEARCH_DONE;
231 }
232 setOffset(position, status);
233 return handlePrev(position, status);
234}
235
236int32_t SearchIterator::next(UErrorCode &status)
237{
238 if (U_SUCCESS(status)) {
239 int32_t offset = getOffset();
240 int32_t matchindex = m_search_->matchedIndex;
241 int32_t matchlength = m_search_->matchedLength;
242 m_search_->reset = FALSE;
243 if (m_search_->isForwardSearching == TRUE) {
244 int32_t textlength = m_search_->textLength;
245 if (offset == textlength || matchindex == textlength ||
246 (matchindex != USEARCH_DONE &&
247 matchindex + matchlength >= textlength)) {
248 // not enough characters to match
249 setMatchNotFound();
250 return USEARCH_DONE;
251 }
252 }
253 else {
254 // switching direction.
255 // if matchedIndex == USEARCH_DONE, it means that either a
256 // setOffset has been called or that previous ran off the text
257 // string. the iterator would have been set to offset 0 if a
258 // match is not found.
259 m_search_->isForwardSearching = TRUE;
260 if (m_search_->matchedIndex != USEARCH_DONE) {
261 // there's no need to set the collation element iterator
262 // the next call to next will set the offset.
263 return matchindex;
264 }
265 }
266
267 if (matchlength > 0) {
268 // if matchlength is 0 we are at the start of the iteration
269 if (m_search_->isOverlap) {
270 offset ++;
271 }
272 else {
273 offset += matchlength;
274 }
275 }
276 return handleNext(offset, status);
277 }
278 return USEARCH_DONE;
279}
280
281int32_t SearchIterator::previous(UErrorCode &status)
282{
283 if (U_SUCCESS(status)) {
284 int32_t offset;
285 if (m_search_->reset) {
286 offset = m_search_->textLength;
287 m_search_->isForwardSearching = FALSE;
288 m_search_->reset = FALSE;
289 setOffset(offset, status);
290 }
291 else {
292 offset = getOffset();
293 }
294
295 int32_t matchindex = m_search_->matchedIndex;
296 if (m_search_->isForwardSearching == TRUE) {
297 // switching direction.
298 // if matchedIndex == USEARCH_DONE, it means that either a
299 // setOffset has been called or that next ran off the text
300 // string. the iterator would have been set to offset textLength if
301 // a match is not found.
302 m_search_->isForwardSearching = FALSE;
303 if (matchindex != USEARCH_DONE) {
304 return matchindex;
305 }
306 }
307 else {
308 if (offset == 0 || matchindex == 0) {
309 // not enough characters to match
310 setMatchNotFound();
311 return USEARCH_DONE;
312 }
313 }
314
315 if (matchindex != USEARCH_DONE) {
316 if (m_search_->isOverlap) {
317 matchindex += m_search_->matchedLength - 2;
318 }
319
320 return handlePrev(matchindex, status);
321 }
322
323 return handlePrev(offset, status);
324 }
325
326 return USEARCH_DONE;
327}
328
329void SearchIterator::reset()
330{
331 UErrorCode status = U_ZERO_ERROR;
332 setMatchNotFound();
333 setOffset(0, status);
334 m_search_->isOverlap = FALSE;
335 m_search_->isCanonicalMatch = FALSE;
336 m_search_->elementComparisonType = 0;
337 m_search_->isForwardSearching = TRUE;
338 m_search_->reset = TRUE;
339}
340
341// protected constructors and destructors -----------------------------
342
343SearchIterator::SearchIterator()
344{
345 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
346 m_search_->breakIter = NULL;
347 m_search_->isOverlap = FALSE;
348 m_search_->isCanonicalMatch = FALSE;
349 m_search_->elementComparisonType = 0;
350 m_search_->isForwardSearching = TRUE;
351 m_search_->reset = TRUE;
352 m_search_->matchedIndex = USEARCH_DONE;
353 m_search_->matchedLength = 0;
354 m_search_->text = NULL;
355 m_search_->textLength = 0;
356 m_breakiterator_ = NULL;
357}
358
359SearchIterator::SearchIterator(const UnicodeString &text,
360 BreakIterator *breakiter) :
361 m_breakiterator_(breakiter),
362 m_text_(text)
363{
364 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
365 m_search_->breakIter = NULL;
366 m_search_->isOverlap = FALSE;
367 m_search_->isCanonicalMatch = FALSE;
368 m_search_->elementComparisonType = 0;
369 m_search_->isForwardSearching = TRUE;
370 m_search_->reset = TRUE;
371 m_search_->matchedIndex = USEARCH_DONE;
372 m_search_->matchedLength = 0;
373 m_search_->text = m_text_.getBuffer();
374 m_search_->textLength = text.length();
375}
376
377SearchIterator::SearchIterator(CharacterIterator &text,
378 BreakIterator *breakiter) :
379 m_breakiterator_(breakiter)
380{
381 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
382 m_search_->breakIter = NULL;
383 m_search_->isOverlap = FALSE;
384 m_search_->isCanonicalMatch = FALSE;
385 m_search_->elementComparisonType = 0;
386 m_search_->isForwardSearching = TRUE;
387 m_search_->reset = TRUE;
388 m_search_->matchedIndex = USEARCH_DONE;
389 m_search_->matchedLength = 0;
390 text.getText(m_text_);
391 m_search_->text = m_text_.getBuffer();
392 m_search_->textLength = m_text_.length();
393 m_breakiterator_ = breakiter;
394}
395
396// protected methods ------------------------------------------------------
397
398SearchIterator & SearchIterator::operator=(const SearchIterator &that)
399{
400 if (this != &that) {
401 m_breakiterator_ = that.m_breakiterator_;
402 m_text_ = that.m_text_;
403 m_search_->breakIter = that.m_search_->breakIter;
404 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
405 m_search_->isOverlap = that.m_search_->isOverlap;
406 m_search_->elementComparisonType = that.m_search_->elementComparisonType;
407 m_search_->matchedIndex = that.m_search_->matchedIndex;
408 m_search_->matchedLength = that.m_search_->matchedLength;
409 m_search_->text = that.m_search_->text;
410 m_search_->textLength = that.m_search_->textLength;
411 }
412 return *this;
413}
414
415void SearchIterator::setMatchLength(int32_t length)
416{
417 m_search_->matchedLength = length;
418}
419
420void SearchIterator::setMatchStart(int32_t position)
421{
422 m_search_->matchedIndex = position;
423}
424
425void SearchIterator::setMatchNotFound()
426{
427 setMatchStart(USEARCH_DONE);
428 setMatchLength(0);
429 UErrorCode status = U_ZERO_ERROR;
430 // by default no errors should be returned here since offsets are within
431 // range.
432 if (m_search_->isForwardSearching) {
433 setOffset(m_search_->textLength, status);
434 }
435 else {
436 setOffset(0, status);
437 }
438}
439
440
441U_NAMESPACE_END
442
443#endif /* #if !UCONFIG_NO_COLLATION */