]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/search.cpp
ICU-64243.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / search.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
6 **********************************************************************
7 * Date Name Description
8 * 03/22/2000 helena Creation.
9 **********************************************************************
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/brkiter.h"
17 #include "unicode/schriter.h"
18 #include "unicode/search.h"
19 #include "usrchimp.h"
20 #include "cmemory.h"
21
22 // public constructors and destructors -----------------------------------
23 U_NAMESPACE_BEGIN
24
25 SearchIterator::SearchIterator(const SearchIterator &other)
26 : UObject(other)
27 {
28 m_breakiterator_ = other.m_breakiterator_;
29 m_text_ = other.m_text_;
30 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
31 m_search_->breakIter = other.m_search_->breakIter;
32 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
33 m_search_->isOverlap = other.m_search_->isOverlap;
34 m_search_->elementComparisonType = other.m_search_->elementComparisonType;
35 m_search_->matchedIndex = other.m_search_->matchedIndex;
36 m_search_->matchedLength = other.m_search_->matchedLength;
37 m_search_->text = other.m_search_->text;
38 m_search_->textLength = other.m_search_->textLength;
39 }
40
41 SearchIterator::~SearchIterator()
42 {
43 if (m_search_ != NULL) {
44 uprv_free(m_search_);
45 }
46 }
47
48 // public get and set methods ----------------------------------------
49
50 void SearchIterator::setAttribute(USearchAttribute attribute,
51 USearchAttributeValue value,
52 UErrorCode &status)
53 {
54 if (U_SUCCESS(status)) {
55 switch (attribute)
56 {
57 case USEARCH_OVERLAP :
58 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
59 break;
60 case USEARCH_CANONICAL_MATCH :
61 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
62 break;
63 case USEARCH_ELEMENT_COMPARISON :
64 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
65 m_search_->elementComparisonType = (int16_t)value;
66 } else {
67 m_search_->elementComparisonType = 0;
68 }
69 break;
70 default:
71 status = U_ILLEGAL_ARGUMENT_ERROR;
72 }
73 }
74 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
75 status = U_ILLEGAL_ARGUMENT_ERROR;
76 }
77 }
78
79 USearchAttributeValue SearchIterator::getAttribute(
80 USearchAttribute attribute) const
81 {
82 switch (attribute) {
83 case USEARCH_OVERLAP :
84 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
85 case USEARCH_CANONICAL_MATCH :
86 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
87 USEARCH_OFF);
88 case USEARCH_ELEMENT_COMPARISON :
89 {
90 int16_t value = m_search_->elementComparisonType;
91 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
92 return (USearchAttributeValue)value;
93 } else {
94 return USEARCH_STANDARD_ELEMENT_COMPARISON;
95 }
96 }
97 default :
98 return USEARCH_DEFAULT;
99 }
100 }
101
102 int32_t SearchIterator::getMatchedStart() const
103 {
104 return m_search_->matchedIndex;
105 }
106
107 int32_t SearchIterator::getMatchedLength() const
108 {
109 return m_search_->matchedLength;
110 }
111
112 void SearchIterator::getMatchedText(UnicodeString &result) const
113 {
114 int32_t matchedindex = m_search_->matchedIndex;
115 int32_t matchedlength = m_search_->matchedLength;
116 if (matchedindex != USEARCH_DONE && matchedlength != 0) {
117 result.setTo(m_search_->text + matchedindex, matchedlength);
118 }
119 else {
120 result.remove();
121 }
122 }
123
124 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
125 UErrorCode &status)
126 {
127 if (U_SUCCESS(status)) {
128 #if 0
129 m_search_->breakIter = NULL;
130 // the c++ breakiterator may not make use of ubreakiterator.
131 // so we'll have to keep track of it ourselves.
132 #else
133 // Well, gee... the Constructors that take a BreakIterator
134 // all cast the BreakIterator to a UBreakIterator and
135 // pass it to the corresponding usearch_openFromXXX
136 // routine, so there's no reason not to do this.
137 //
138 // Besides, a UBreakIterator is a BreakIterator, so
139 // any subclass of BreakIterator should work fine here...
140 m_search_->breakIter = (UBreakIterator *) breakiter;
141 #endif
142
143 m_breakiterator_ = breakiter;
144 }
145 }
146
147 const BreakIterator * SearchIterator::getBreakIterator(void) const
148 {
149 return m_breakiterator_;
150 }
151
152 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
153 {
154 if (U_SUCCESS(status)) {
155 if (text.length() == 0) {
156 status = U_ILLEGAL_ARGUMENT_ERROR;
157 }
158 else {
159 m_text_ = text;
160 m_search_->text = m_text_.getBuffer();
161 m_search_->textLength = m_text_.length();
162 }
163 }
164 }
165
166 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
167 {
168 if (U_SUCCESS(status)) {
169 text.getText(m_text_);
170 setText(m_text_, status);
171 }
172 }
173
174 const UnicodeString & SearchIterator::getText(void) const
175 {
176 return m_text_;
177 }
178
179 // operator overloading ----------------------------------------------
180
181 UBool SearchIterator::operator==(const SearchIterator &that) const
182 {
183 if (this == &that) {
184 return TRUE;
185 }
186 return (m_breakiterator_ == that.m_breakiterator_ &&
187 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
188 m_search_->isOverlap == that.m_search_->isOverlap &&
189 m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
190 m_search_->matchedIndex == that.m_search_->matchedIndex &&
191 m_search_->matchedLength == that.m_search_->matchedLength &&
192 m_search_->textLength == that.m_search_->textLength &&
193 getOffset() == that.getOffset() &&
194 (uprv_memcmp(m_search_->text, that.m_search_->text,
195 m_search_->textLength * sizeof(UChar)) == 0));
196 }
197
198 // public methods ----------------------------------------------------
199
200 int32_t SearchIterator::first(UErrorCode &status)
201 {
202 if (U_FAILURE(status)) {
203 return USEARCH_DONE;
204 }
205 setOffset(0, status);
206 return handleNext(0, status);
207 }
208
209 int32_t SearchIterator::following(int32_t position,
210 UErrorCode &status)
211 {
212 if (U_FAILURE(status)) {
213 return USEARCH_DONE;
214 }
215 setOffset(position, status);
216 return handleNext(position, status);
217 }
218
219 int32_t SearchIterator::last(UErrorCode &status)
220 {
221 if (U_FAILURE(status)) {
222 return USEARCH_DONE;
223 }
224 setOffset(m_search_->textLength, status);
225 return handlePrev(m_search_->textLength, status);
226 }
227
228 int32_t SearchIterator::preceding(int32_t position,
229 UErrorCode &status)
230 {
231 if (U_FAILURE(status)) {
232 return USEARCH_DONE;
233 }
234 setOffset(position, status);
235 return handlePrev(position, status);
236 }
237
238 int32_t SearchIterator::next(UErrorCode &status)
239 {
240 if (U_SUCCESS(status)) {
241 int32_t offset = getOffset();
242 int32_t matchindex = m_search_->matchedIndex;
243 int32_t matchlength = m_search_->matchedLength;
244 m_search_->reset = FALSE;
245 if (m_search_->isForwardSearching == TRUE) {
246 int32_t textlength = m_search_->textLength;
247 if (offset == textlength || matchindex == textlength ||
248 (matchindex != USEARCH_DONE &&
249 matchindex + matchlength >= textlength)) {
250 // not enough characters to match
251 setMatchNotFound();
252 return USEARCH_DONE;
253 }
254 }
255 else {
256 // switching direction.
257 // if matchedIndex == USEARCH_DONE, it means that either a
258 // setOffset has been called or that previous ran off the text
259 // string. the iterator would have been set to offset 0 if a
260 // match is not found.
261 m_search_->isForwardSearching = TRUE;
262 if (m_search_->matchedIndex != USEARCH_DONE) {
263 // there's no need to set the collation element iterator
264 // the next call to next will set the offset.
265 return matchindex;
266 }
267 }
268
269 if (matchlength > 0) {
270 // if matchlength is 0 we are at the start of the iteration
271 if (m_search_->isOverlap) {
272 offset ++;
273 }
274 else {
275 offset += matchlength;
276 }
277 }
278 return handleNext(offset, status);
279 }
280 return USEARCH_DONE;
281 }
282
283 int32_t SearchIterator::previous(UErrorCode &status)
284 {
285 if (U_SUCCESS(status)) {
286 int32_t offset;
287 if (m_search_->reset) {
288 offset = m_search_->textLength;
289 m_search_->isForwardSearching = FALSE;
290 m_search_->reset = FALSE;
291 setOffset(offset, status);
292 }
293 else {
294 offset = getOffset();
295 }
296
297 int32_t matchindex = m_search_->matchedIndex;
298 if (m_search_->isForwardSearching == TRUE) {
299 // switching direction.
300 // if matchedIndex == USEARCH_DONE, it means that either a
301 // setOffset has been called or that next ran off the text
302 // string. the iterator would have been set to offset textLength if
303 // a match is not found.
304 m_search_->isForwardSearching = FALSE;
305 if (matchindex != USEARCH_DONE) {
306 return matchindex;
307 }
308 }
309 else {
310 if (offset == 0 || matchindex == 0) {
311 // not enough characters to match
312 setMatchNotFound();
313 return USEARCH_DONE;
314 }
315 }
316
317 if (matchindex != USEARCH_DONE) {
318 if (m_search_->isOverlap) {
319 matchindex += m_search_->matchedLength - 2;
320 }
321
322 return handlePrev(matchindex, status);
323 }
324
325 return handlePrev(offset, status);
326 }
327
328 return USEARCH_DONE;
329 }
330
331 void SearchIterator::reset()
332 {
333 UErrorCode status = U_ZERO_ERROR;
334 setMatchNotFound();
335 setOffset(0, status);
336 m_search_->isOverlap = FALSE;
337 m_search_->isCanonicalMatch = FALSE;
338 m_search_->elementComparisonType = 0;
339 m_search_->isForwardSearching = TRUE;
340 m_search_->reset = TRUE;
341 }
342
343 // protected constructors and destructors -----------------------------
344
345 SearchIterator::SearchIterator()
346 {
347 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
348 m_search_->breakIter = NULL;
349 m_search_->isOverlap = FALSE;
350 m_search_->isCanonicalMatch = FALSE;
351 m_search_->elementComparisonType = 0;
352 m_search_->isForwardSearching = TRUE;
353 m_search_->reset = TRUE;
354 m_search_->matchedIndex = USEARCH_DONE;
355 m_search_->matchedLength = 0;
356 m_search_->text = NULL;
357 m_search_->textLength = 0;
358 m_breakiterator_ = NULL;
359 }
360
361 SearchIterator::SearchIterator(const UnicodeString &text,
362 BreakIterator *breakiter) :
363 m_breakiterator_(breakiter),
364 m_text_(text)
365 {
366 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
367 m_search_->breakIter = NULL;
368 m_search_->isOverlap = FALSE;
369 m_search_->isCanonicalMatch = FALSE;
370 m_search_->elementComparisonType = 0;
371 m_search_->isForwardSearching = TRUE;
372 m_search_->reset = TRUE;
373 m_search_->matchedIndex = USEARCH_DONE;
374 m_search_->matchedLength = 0;
375 m_search_->text = m_text_.getBuffer();
376 m_search_->textLength = text.length();
377 }
378
379 SearchIterator::SearchIterator(CharacterIterator &text,
380 BreakIterator *breakiter) :
381 m_breakiterator_(breakiter)
382 {
383 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
384 m_search_->breakIter = NULL;
385 m_search_->isOverlap = FALSE;
386 m_search_->isCanonicalMatch = FALSE;
387 m_search_->elementComparisonType = 0;
388 m_search_->isForwardSearching = TRUE;
389 m_search_->reset = TRUE;
390 m_search_->matchedIndex = USEARCH_DONE;
391 m_search_->matchedLength = 0;
392 text.getText(m_text_);
393 m_search_->text = m_text_.getBuffer();
394 m_search_->textLength = m_text_.length();
395 m_breakiterator_ = breakiter;
396 }
397
398 // protected methods ------------------------------------------------------
399
400 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
401 {
402 if (this != &that) {
403 m_breakiterator_ = that.m_breakiterator_;
404 m_text_ = that.m_text_;
405 m_search_->breakIter = that.m_search_->breakIter;
406 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
407 m_search_->isOverlap = that.m_search_->isOverlap;
408 m_search_->elementComparisonType = that.m_search_->elementComparisonType;
409 m_search_->matchedIndex = that.m_search_->matchedIndex;
410 m_search_->matchedLength = that.m_search_->matchedLength;
411 m_search_->text = that.m_search_->text;
412 m_search_->textLength = that.m_search_->textLength;
413 }
414 return *this;
415 }
416
417 void SearchIterator::setMatchLength(int32_t length)
418 {
419 m_search_->matchedLength = length;
420 }
421
422 void SearchIterator::setMatchStart(int32_t position)
423 {
424 m_search_->matchedIndex = position;
425 }
426
427 void SearchIterator::setMatchNotFound()
428 {
429 setMatchStart(USEARCH_DONE);
430 setMatchLength(0);
431 UErrorCode status = U_ZERO_ERROR;
432 // by default no errors should be returned here since offsets are within
433 // range.
434 if (m_search_->isForwardSearching) {
435 setOffset(m_search_->textLength, status);
436 }
437 else {
438 setOffset(0, status);
439 }
440 }
441
442
443 U_NAMESPACE_END
444
445 #endif /* #if !UCONFIG_NO_COLLATION */