]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/search.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / search.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4**********************************************************************
729e4ab9 5* Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
b75a7d8f
A
6**********************************************************************
7* Date Name Description
8* 03/22/2000 helena Creation.
9**********************************************************************
10*/
11
12#include "unicode/utypes.h"
13
46f4442e 14#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
b75a7d8f
A
15
16#include "unicode/brkiter.h"
17#include "unicode/schriter.h"
18#include "unicode/search.h"
19#include "usrchimp.h"
20#include "cmemory.h"
21
22// public constructors and destructors -----------------------------------
23U_NAMESPACE_BEGIN
24
25SearchIterator::SearchIterator(const SearchIterator &other)
26 : UObject(other)
27{
28 m_breakiterator_ = other.m_breakiterator_;
29 m_text_ = other.m_text_;
30 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
31 m_search_->breakIter = other.m_search_->breakIter;
32 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
33 m_search_->isOverlap = other.m_search_->isOverlap;
729e4ab9 34 m_search_->elementComparisonType = other.m_search_->elementComparisonType;
b75a7d8f
A
35 m_search_->matchedIndex = other.m_search_->matchedIndex;
36 m_search_->matchedLength = other.m_search_->matchedLength;
37 m_search_->text = other.m_search_->text;
38 m_search_->textLength = other.m_search_->textLength;
39}
40
41SearchIterator::~SearchIterator()
42{
43 if (m_search_ != NULL) {
44 uprv_free(m_search_);
45 }
46}
47
48// public get and set methods ----------------------------------------
49
50void SearchIterator::setAttribute(USearchAttribute attribute,
51 USearchAttributeValue value,
52 UErrorCode &status)
53{
54 if (U_SUCCESS(status)) {
55 switch (attribute)
56 {
57 case USEARCH_OVERLAP :
58 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
59 break;
60 case USEARCH_CANONICAL_MATCH :
61 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
62 break;
729e4ab9
A
63 case USEARCH_ELEMENT_COMPARISON :
64 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
65 m_search_->elementComparisonType = (int16_t)value;
66 } else {
67 m_search_->elementComparisonType = 0;
68 }
69 break;
b75a7d8f
A
70 default:
71 status = U_ILLEGAL_ARGUMENT_ERROR;
72 }
73 }
74 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
75 status = U_ILLEGAL_ARGUMENT_ERROR;
76 }
77}
78
79USearchAttributeValue SearchIterator::getAttribute(
80 USearchAttribute attribute) const
81{
82 switch (attribute) {
83 case USEARCH_OVERLAP :
84 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
85 case USEARCH_CANONICAL_MATCH :
86 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
87 USEARCH_OFF);
729e4ab9
A
88 case USEARCH_ELEMENT_COMPARISON :
89 {
90 int16_t value = m_search_->elementComparisonType;
91 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
92 return (USearchAttributeValue)value;
93 } else {
94 return USEARCH_STANDARD_ELEMENT_COMPARISON;
95 }
96 }
b75a7d8f
A
97 default :
98 return USEARCH_DEFAULT;
99 }
100}
101
102int32_t SearchIterator::getMatchedStart() const
103{
104 return m_search_->matchedIndex;
105}
106
107int32_t SearchIterator::getMatchedLength() const
108{
109 return m_search_->matchedLength;
110}
111
112void SearchIterator::getMatchedText(UnicodeString &result) const
113{
114 int32_t matchedindex = m_search_->matchedIndex;
115 int32_t matchedlength = m_search_->matchedLength;
116 if (matchedindex != USEARCH_DONE && matchedlength != 0) {
117 result.setTo(m_search_->text + matchedindex, matchedlength);
118 }
119 else {
120 result.remove();
121 }
122}
123
124void SearchIterator::setBreakIterator(BreakIterator *breakiter,
125 UErrorCode &status)
126{
127 if (U_SUCCESS(status)) {
46f4442e 128#if 0
b75a7d8f
A
129 m_search_->breakIter = NULL;
130 // the c++ breakiterator may not make use of ubreakiterator.
131 // so we'll have to keep track of it ourselves.
46f4442e
A
132#else
133 // Well, gee... the Constructors that take a BreakIterator
134 // all cast the BreakIterator to a UBreakIterator and
135 // pass it to the corresponding usearch_openFromXXX
136 // routine, so there's no reason not to do this.
137 //
138 // Besides, a UBreakIterator is a BreakIterator, so
139 // any subclass of BreakIterator should work fine here...
140 m_search_->breakIter = (UBreakIterator *) breakiter;
141#endif
142
b75a7d8f
A
143 m_breakiterator_ = breakiter;
144 }
145}
146
147const BreakIterator * SearchIterator::getBreakIterator(void) const
148{
149 return m_breakiterator_;
150}
151
152void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
153{
154 if (U_SUCCESS(status)) {
155 if (text.length() == 0) {
156 status = U_ILLEGAL_ARGUMENT_ERROR;
157 }
158 else {
159 m_text_ = text;
160 m_search_->text = m_text_.getBuffer();
161 m_search_->textLength = m_text_.length();
162 }
163 }
164}
165
166void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
167{
168 if (U_SUCCESS(status)) {
169 text.getText(m_text_);
170 setText(m_text_, status);
171 }
172}
173
174const UnicodeString & SearchIterator::getText(void) const
175{
176 return m_text_;
177}
178
179// operator overloading ----------------------------------------------
180
181UBool SearchIterator::operator==(const SearchIterator &that) const
182{
183 if (this == &that) {
184 return TRUE;
185 }
186 return (m_breakiterator_ == that.m_breakiterator_ &&
187 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
188 m_search_->isOverlap == that.m_search_->isOverlap &&
729e4ab9 189 m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
b75a7d8f
A
190 m_search_->matchedIndex == that.m_search_->matchedIndex &&
191 m_search_->matchedLength == that.m_search_->matchedLength &&
192 m_search_->textLength == that.m_search_->textLength &&
193 getOffset() == that.getOffset() &&
194 (uprv_memcmp(m_search_->text, that.m_search_->text,
195 m_search_->textLength * sizeof(UChar)) == 0));
196}
197
198// public methods ----------------------------------------------------
199
200int32_t SearchIterator::first(UErrorCode &status)
201{
202 if (U_FAILURE(status)) {
203 return USEARCH_DONE;
204 }
205 setOffset(0, status);
206 return handleNext(0, status);
207}
208
209int32_t SearchIterator::following(int32_t position,
210 UErrorCode &status)
211{
212 if (U_FAILURE(status)) {
213 return USEARCH_DONE;
214 }
215 setOffset(position, status);
216 return handleNext(position, status);
217}
218
219int32_t SearchIterator::last(UErrorCode &status)
220{
221 if (U_FAILURE(status)) {
222 return USEARCH_DONE;
223 }
224 setOffset(m_search_->textLength, status);
225 return handlePrev(m_search_->textLength, status);
226}
227
228int32_t SearchIterator::preceding(int32_t position,
229 UErrorCode &status)
230{
231 if (U_FAILURE(status)) {
232 return USEARCH_DONE;
233 }
234 setOffset(position, status);
235 return handlePrev(position, status);
236}
237
238int32_t SearchIterator::next(UErrorCode &status)
239{
240 if (U_SUCCESS(status)) {
241 int32_t offset = getOffset();
242 int32_t matchindex = m_search_->matchedIndex;
243 int32_t matchlength = m_search_->matchedLength;
244 m_search_->reset = FALSE;
245 if (m_search_->isForwardSearching == TRUE) {
246 int32_t textlength = m_search_->textLength;
247 if (offset == textlength || matchindex == textlength ||
248 (matchindex != USEARCH_DONE &&
249 matchindex + matchlength >= textlength)) {
250 // not enough characters to match
251 setMatchNotFound();
252 return USEARCH_DONE;
253 }
254 }
255 else {
256 // switching direction.
257 // if matchedIndex == USEARCH_DONE, it means that either a
258 // setOffset has been called or that previous ran off the text
259 // string. the iterator would have been set to offset 0 if a
260 // match is not found.
261 m_search_->isForwardSearching = TRUE;
262 if (m_search_->matchedIndex != USEARCH_DONE) {
263 // there's no need to set the collation element iterator
264 // the next call to next will set the offset.
265 return matchindex;
266 }
267 }
268
269 if (matchlength > 0) {
374ca955
A
270 // if matchlength is 0 we are at the start of the iteration
271 if (m_search_->isOverlap) {
272 offset ++;
273 }
274 else {
275 offset += matchlength;
276 }
277 }
b75a7d8f
A
278 return handleNext(offset, status);
279 }
280 return USEARCH_DONE;
281}
282
283int32_t SearchIterator::previous(UErrorCode &status)
284{
285 if (U_SUCCESS(status)) {
286 int32_t offset;
287 if (m_search_->reset) {
288 offset = m_search_->textLength;
289 m_search_->isForwardSearching = FALSE;
290 m_search_->reset = FALSE;
291 setOffset(offset, status);
292 }
293 else {
294 offset = getOffset();
295 }
296
297 int32_t matchindex = m_search_->matchedIndex;
298 if (m_search_->isForwardSearching == TRUE) {
299 // switching direction.
300 // if matchedIndex == USEARCH_DONE, it means that either a
301 // setOffset has been called or that next ran off the text
302 // string. the iterator would have been set to offset textLength if
303 // a match is not found.
304 m_search_->isForwardSearching = FALSE;
305 if (matchindex != USEARCH_DONE) {
306 return matchindex;
307 }
308 }
309 else {
310 if (offset == 0 || matchindex == 0) {
311 // not enough characters to match
312 setMatchNotFound();
313 return USEARCH_DONE;
314 }
315 }
316
317 if (matchindex != USEARCH_DONE) {
46f4442e
A
318 if (m_search_->isOverlap) {
319 matchindex += m_search_->matchedLength - 2;
320 }
321
b75a7d8f
A
322 return handlePrev(matchindex, status);
323 }
46f4442e 324
b75a7d8f
A
325 return handlePrev(offset, status);
326 }
46f4442e 327
b75a7d8f
A
328 return USEARCH_DONE;
329}
330
331void SearchIterator::reset()
332{
333 UErrorCode status = U_ZERO_ERROR;
334 setMatchNotFound();
335 setOffset(0, status);
336 m_search_->isOverlap = FALSE;
337 m_search_->isCanonicalMatch = FALSE;
729e4ab9 338 m_search_->elementComparisonType = 0;
b75a7d8f
A
339 m_search_->isForwardSearching = TRUE;
340 m_search_->reset = TRUE;
341}
342
343// protected constructors and destructors -----------------------------
344
345SearchIterator::SearchIterator()
346{
347 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
348 m_search_->breakIter = NULL;
349 m_search_->isOverlap = FALSE;
350 m_search_->isCanonicalMatch = FALSE;
729e4ab9 351 m_search_->elementComparisonType = 0;
b75a7d8f
A
352 m_search_->isForwardSearching = TRUE;
353 m_search_->reset = TRUE;
354 m_search_->matchedIndex = USEARCH_DONE;
355 m_search_->matchedLength = 0;
356 m_search_->text = NULL;
357 m_search_->textLength = 0;
73c04bcf 358 m_breakiterator_ = NULL;
b75a7d8f
A
359}
360
361SearchIterator::SearchIterator(const UnicodeString &text,
362 BreakIterator *breakiter) :
363 m_breakiterator_(breakiter),
364 m_text_(text)
365{
366 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
367 m_search_->breakIter = NULL;
368 m_search_->isOverlap = FALSE;
369 m_search_->isCanonicalMatch = FALSE;
729e4ab9 370 m_search_->elementComparisonType = 0;
b75a7d8f
A
371 m_search_->isForwardSearching = TRUE;
372 m_search_->reset = TRUE;
373 m_search_->matchedIndex = USEARCH_DONE;
374 m_search_->matchedLength = 0;
375 m_search_->text = m_text_.getBuffer();
376 m_search_->textLength = text.length();
377}
378
379SearchIterator::SearchIterator(CharacterIterator &text,
380 BreakIterator *breakiter) :
381 m_breakiterator_(breakiter)
382{
383 m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
384 m_search_->breakIter = NULL;
385 m_search_->isOverlap = FALSE;
386 m_search_->isCanonicalMatch = FALSE;
729e4ab9 387 m_search_->elementComparisonType = 0;
b75a7d8f
A
388 m_search_->isForwardSearching = TRUE;
389 m_search_->reset = TRUE;
390 m_search_->matchedIndex = USEARCH_DONE;
391 m_search_->matchedLength = 0;
392 text.getText(m_text_);
393 m_search_->text = m_text_.getBuffer();
394 m_search_->textLength = m_text_.length();
395 m_breakiterator_ = breakiter;
396}
397
398// protected methods ------------------------------------------------------
399
400SearchIterator & SearchIterator::operator=(const SearchIterator &that)
401{
402 if (this != &that) {
403 m_breakiterator_ = that.m_breakiterator_;
404 m_text_ = that.m_text_;
405 m_search_->breakIter = that.m_search_->breakIter;
406 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
407 m_search_->isOverlap = that.m_search_->isOverlap;
729e4ab9 408 m_search_->elementComparisonType = that.m_search_->elementComparisonType;
b75a7d8f
A
409 m_search_->matchedIndex = that.m_search_->matchedIndex;
410 m_search_->matchedLength = that.m_search_->matchedLength;
411 m_search_->text = that.m_search_->text;
412 m_search_->textLength = that.m_search_->textLength;
413 }
414 return *this;
415}
416
417void SearchIterator::setMatchLength(int32_t length)
418{
419 m_search_->matchedLength = length;
420}
421
422void SearchIterator::setMatchStart(int32_t position)
423{
424 m_search_->matchedIndex = position;
425}
426
427void SearchIterator::setMatchNotFound()
428{
429 setMatchStart(USEARCH_DONE);
430 setMatchLength(0);
431 UErrorCode status = U_ZERO_ERROR;
432 // by default no errors should be returned here since offsets are within
433 // range.
434 if (m_search_->isForwardSearching) {
435 setOffset(m_search_->textLength, status);
436 }
437 else {
438 setOffset(0, status);
439 }
440}
441
442
443U_NAMESPACE_END
444
445#endif /* #if !UCONFIG_NO_COLLATION */