]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/stsearch.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / stsearch.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
57a6839d 3* Copyright (C) 2001-2014 IBM and others. All rights reserved.
b75a7d8f
A
4**********************************************************************
5* Date Name Description
6* 03/22/2000 helena Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
46f4442e 12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
b75a7d8f
A
13
14#include "unicode/stsearch.h"
15#include "usrchimp.h"
16#include "cmemory.h"
17
18U_NAMESPACE_BEGIN
19
374ca955 20UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
b75a7d8f
A
21
22// public constructors and destructors -----------------------------------
23
73c04bcf 24StringSearch::StringSearch(const UnicodeString &pattern,
b75a7d8f 25 const UnicodeString &text,
73c04bcf 26 const Locale &locale,
b75a7d8f
A
27 BreakIterator *breakiter,
28 UErrorCode &status) :
73c04bcf 29 SearchIterator(text, breakiter),
b75a7d8f
A
30 m_pattern_(pattern)
31{
32 if (U_FAILURE(status)) {
33 m_strsrch_ = NULL;
34 return;
35 }
36
73c04bcf
A
37 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
38 m_text_.getBuffer(), m_text_.length(),
39 locale.getName(), (UBreakIterator *)breakiter,
b75a7d8f
A
40 &status);
41 uprv_free(m_search_);
42 m_search_ = NULL;
43
b75a7d8f 44 if (U_SUCCESS(status)) {
b75a7d8f
A
45 // m_search_ has been created by the base SearchIterator class
46 m_search_ = m_strsrch_->search;
47 }
48}
49
73c04bcf 50StringSearch::StringSearch(const UnicodeString &pattern,
b75a7d8f 51 const UnicodeString &text,
73c04bcf 52 RuleBasedCollator *coll,
b75a7d8f
A
53 BreakIterator *breakiter,
54 UErrorCode &status) :
73c04bcf 55 SearchIterator(text, breakiter),
b75a7d8f
A
56 m_pattern_(pattern)
57{
58 if (U_FAILURE(status)) {
59 m_strsrch_ = NULL;
60 return;
61 }
62 if (coll == NULL) {
63 status = U_ILLEGAL_ARGUMENT_ERROR;
64 m_strsrch_ = NULL;
65 return;
66 }
73c04bcf
A
67 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
68 m_pattern_.length(),
69 m_text_.getBuffer(),
57a6839d 70 m_text_.length(), coll->toUCollator(),
73c04bcf 71 (UBreakIterator *)breakiter,
b75a7d8f
A
72 &status);
73 uprv_free(m_search_);
74 m_search_ = NULL;
75
76 if (U_SUCCESS(status)) {
b75a7d8f
A
77 // m_search_ has been created by the base SearchIterator class
78 m_search_ = m_strsrch_->search;
79 }
80}
81
73c04bcf 82StringSearch::StringSearch(const UnicodeString &pattern,
b75a7d8f 83 CharacterIterator &text,
73c04bcf 84 const Locale &locale,
b75a7d8f
A
85 BreakIterator *breakiter,
86 UErrorCode &status) :
73c04bcf 87 SearchIterator(text, breakiter),
b75a7d8f
A
88 m_pattern_(pattern)
89{
90 if (U_FAILURE(status)) {
91 m_strsrch_ = NULL;
92 return;
93 }
73c04bcf
A
94 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
95 m_text_.getBuffer(), m_text_.length(),
96 locale.getName(), (UBreakIterator *)breakiter,
b75a7d8f
A
97 &status);
98 uprv_free(m_search_);
99 m_search_ = NULL;
100
101 if (U_SUCCESS(status)) {
b75a7d8f
A
102 // m_search_ has been created by the base SearchIterator class
103 m_search_ = m_strsrch_->search;
104 }
105}
106
107StringSearch::StringSearch(const UnicodeString &pattern,
108 CharacterIterator &text,
73c04bcf 109 RuleBasedCollator *coll,
b75a7d8f
A
110 BreakIterator *breakiter,
111 UErrorCode &status) :
73c04bcf 112 SearchIterator(text, breakiter),
b75a7d8f
A
113 m_pattern_(pattern)
114{
115 if (U_FAILURE(status)) {
116 m_strsrch_ = NULL;
117 return;
118 }
119 if (coll == NULL) {
120 status = U_ILLEGAL_ARGUMENT_ERROR;
121 m_strsrch_ = NULL;
122 return;
123 }
73c04bcf
A
124 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
125 m_pattern_.length(),
126 m_text_.getBuffer(),
57a6839d 127 m_text_.length(), coll->toUCollator(),
73c04bcf 128 (UBreakIterator *)breakiter,
b75a7d8f
A
129 &status);
130 uprv_free(m_search_);
131 m_search_ = NULL;
132
133 if (U_SUCCESS(status)) {
b75a7d8f
A
134 // m_search_ has been created by the base SearchIterator class
135 m_search_ = m_strsrch_->search;
136 }
137}
138
139StringSearch::StringSearch(const StringSearch &that) :
140 SearchIterator(that.m_text_, that.m_breakiterator_),
b75a7d8f
A
141 m_pattern_(that.m_pattern_)
142{
143 UErrorCode status = U_ZERO_ERROR;
73c04bcf
A
144
145 // Free m_search_ from the superclass
146 uprv_free(m_search_);
147 m_search_ = NULL;
148
b75a7d8f 149 if (that.m_strsrch_ == NULL) {
73c04bcf 150 // This was not a good copy
b75a7d8f 151 m_strsrch_ = NULL;
b75a7d8f
A
152 }
153 else {
73c04bcf
A
154 // Make a deep copy
155 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
156 m_pattern_.length(),
157 m_text_.getBuffer(),
158 m_text_.length(),
159 that.m_strsrch_->collator,
160 (UBreakIterator *)that.m_breakiterator_,
b75a7d8f 161 &status);
73c04bcf 162 if (U_SUCCESS(status)) {
73c04bcf
A
163 // m_search_ has been created by the base SearchIterator class
164 m_search_ = m_strsrch_->search;
165 }
b75a7d8f
A
166 }
167}
168
169StringSearch::~StringSearch()
170{
171 if (m_strsrch_ != NULL) {
172 usearch_close(m_strsrch_);
173 m_search_ = NULL;
174 }
175}
176
374ca955
A
177StringSearch *
178StringSearch::clone() const {
179 return new StringSearch(*this);
180}
181
b75a7d8f
A
182// operator overloading ---------------------------------------------
183StringSearch & StringSearch::operator=(const StringSearch &that)
184{
185 if ((*this) != that) {
186 UErrorCode status = U_ZERO_ERROR;
187 m_text_ = that.m_text_;
188 m_breakiterator_ = that.m_breakiterator_;
189 m_pattern_ = that.m_pattern_;
190 // all m_search_ in the parent class is linked up with m_strsrch_
191 usearch_close(m_strsrch_);
73c04bcf
A
192 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
193 m_pattern_.length(),
194 m_text_.getBuffer(),
195 m_text_.length(),
196 that.m_strsrch_->collator,
b75a7d8f 197 NULL, &status);
46f4442e
A
198 // Check null pointer
199 if (m_strsrch_ != NULL) {
57a6839d 200 m_search_ = m_strsrch_->search;
46f4442e 201 }
b75a7d8f
A
202 }
203 return *this;
204}
205
206UBool StringSearch::operator==(const SearchIterator &that) const
207{
208 if (this == &that) {
209 return TRUE;
210 }
211 if (SearchIterator::operator ==(that)) {
212 StringSearch &thatsrch = (StringSearch &)that;
213 return (this->m_pattern_ == thatsrch.m_pattern_ &&
214 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
215 }
216 return FALSE;
217}
218
219// public get and set methods ----------------------------------------
220
221void StringSearch::setOffset(int32_t position, UErrorCode &status)
222{
223 // status checked in usearch_setOffset
224 usearch_setOffset(m_strsrch_, position, &status);
225}
226
227int32_t StringSearch::getOffset(void) const
228{
229 return usearch_getOffset(m_strsrch_);
230}
231
232void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
233{
234 if (U_SUCCESS(status)) {
235 m_text_ = text;
236 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
237 }
238}
73c04bcf 239
b75a7d8f
A
240void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
241{
242 if (U_SUCCESS(status)) {
243 text.getText(m_text_);
244 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
245 }
246}
247
248RuleBasedCollator * StringSearch::getCollator() const
249{
57a6839d
A
250 // Note the const_cast. It would be cleaner if this const method returned a const collator.
251 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
b75a7d8f 252}
73c04bcf 253
b75a7d8f
A
254void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
255{
256 if (U_SUCCESS(status)) {
57a6839d 257 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
b75a7d8f
A
258 }
259}
73c04bcf
A
260
261void StringSearch::setPattern(const UnicodeString &pattern,
b75a7d8f
A
262 UErrorCode &status)
263{
264 if (U_SUCCESS(status)) {
265 m_pattern_ = pattern;
266 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
267 &status);
268 }
269}
73c04bcf 270
b75a7d8f
A
271const UnicodeString & StringSearch::getPattern() const
272{
273 return m_pattern_;
274}
275
276// public methods ----------------------------------------------------
277
278void StringSearch::reset()
279{
280 usearch_reset(m_strsrch_);
281}
282
283SearchIterator * StringSearch::safeClone(void) const
284{
285 UErrorCode status = U_ZERO_ERROR;
73c04bcf 286 StringSearch *result = new StringSearch(m_pattern_, m_text_,
57a6839d 287 getCollator(),
b75a7d8f
A
288 m_breakiterator_,
289 status);
290 /* test for NULL */
291 if (result == 0) {
292 status = U_MEMORY_ALLOCATION_ERROR;
293 return 0;
294 }
295 result->setOffset(getOffset(), status);
296 result->setMatchStart(m_strsrch_->search->matchedIndex);
297 result->setMatchLength(m_strsrch_->search->matchedLength);
298 if (U_FAILURE(status)) {
299 return NULL;
300 }
301 return result;
302}
73c04bcf 303
b75a7d8f
A
304// protected method -------------------------------------------------
305
306int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
307{
308 // values passed here are already in the pre-shift position
309 if (U_SUCCESS(status)) {
b331163b 310 if (m_strsrch_->pattern.cesLength == 0) {
73c04bcf
A
311 m_search_->matchedIndex =
312 m_search_->matchedIndex == USEARCH_DONE ?
b75a7d8f
A
313 getOffset() : m_search_->matchedIndex + 1;
314 m_search_->matchedLength = 0;
73c04bcf 315 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
b75a7d8f
A
316 &status);
317 if (m_search_->matchedIndex == m_search_->textLength) {
318 m_search_->matchedIndex = USEARCH_DONE;
319 }
320 }
321 else {
73c04bcf 322 // looking at usearch.cpp, this part is shifted out to
b75a7d8f
A
323 // StringSearch instead of SearchIterator because m_strsrch_ is
324 // not accessible in SearchIterator
46f4442e 325#if 0
73c04bcf 326 if (position + m_strsrch_->pattern.defaultShiftSize
374ca955 327 > m_search_->textLength) {
b75a7d8f
A
328 setMatchNotFound();
329 return USEARCH_DONE;
330 }
46f4442e 331#endif
374ca955 332 if (m_search_->matchedLength <= 0) {
73c04bcf 333 // the flipping direction issue has already been handled
374ca955
A
334 // in next()
335 // for boundary check purposes. this will ensure that the
336 // next match will not preceed the current offset
337 // note search->matchedIndex will always be set to something
338 // in the code
339 m_search_->matchedIndex = position - 1;
340 }
341
342 ucol_setOffset(m_strsrch_->textIter, position, &status);
46f4442e
A
343
344#if 0
345 for (;;) {
b75a7d8f
A
346 if (m_search_->isCanonicalMatch) {
347 // can't use exact here since extra accents are allowed.
348 usearch_handleNextCanonical(m_strsrch_, &status);
349 }
350 else {
351 usearch_handleNextExact(m_strsrch_, &status);
352 }
353 if (U_FAILURE(status)) {
354 return USEARCH_DONE;
355 }
356 if (m_breakiterator_ == NULL
357#if !UCONFIG_NO_BREAK_ITERATION
73c04bcf 358 ||
b75a7d8f
A
359 m_search_->matchedIndex == USEARCH_DONE ||
360 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
73c04bcf 361 m_breakiterator_->isBoundary(m_search_->matchedIndex +
b75a7d8f
A
362 m_search_->matchedLength))
363#endif
364 ) {
374ca955 365 if (m_search_->matchedIndex == USEARCH_DONE) {
73c04bcf 366 ucol_setOffset(m_strsrch_->textIter,
374ca955
A
367 m_search_->textLength, &status);
368 }
369 else {
73c04bcf 370 ucol_setOffset(m_strsrch_->textIter,
374ca955
A
371 m_search_->matchedIndex, &status);
372 }
b75a7d8f
A
373 return m_search_->matchedIndex;
374 }
375 }
46f4442e
A
376#else
377 // if m_strsrch_->breakIter is always the same as m_breakiterator_
378 // then we don't need to check the match boundaries here because
379 // usearch_handleNextXXX will already have done it.
380 if (m_search_->isCanonicalMatch) {
381 // *could* actually use exact here 'cause no extra accents allowed...
382 usearch_handleNextCanonical(m_strsrch_, &status);
383 } else {
384 usearch_handleNextExact(m_strsrch_, &status);
385 }
386
387 if (U_FAILURE(status)) {
388 return USEARCH_DONE;
389 }
390
391 if (m_search_->matchedIndex == USEARCH_DONE) {
392 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
393 } else {
394 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
395 }
396
397 return m_search_->matchedIndex;
398#endif
b75a7d8f
A
399 }
400 }
401 return USEARCH_DONE;
402}
403
404int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
405{
406 // values passed here are already in the pre-shift position
407 if (U_SUCCESS(status)) {
b331163b 408 if (m_strsrch_->pattern.cesLength == 0) {
73c04bcf
A
409 m_search_->matchedIndex =
410 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
b75a7d8f
A
411 m_search_->matchedIndex);
412 if (m_search_->matchedIndex == 0) {
413 setMatchNotFound();
414 }
415 else {
416 m_search_->matchedIndex --;
73c04bcf 417 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
b75a7d8f
A
418 &status);
419 m_search_->matchedLength = 0;
420 }
421 }
422 else {
73c04bcf 423 // looking at usearch.cpp, this part is shifted out to
b75a7d8f
A
424 // StringSearch instead of SearchIterator because m_strsrch_ is
425 // not accessible in SearchIterator
46f4442e 426#if 0
73c04bcf 427 if (!m_search_->isOverlap &&
b75a7d8f
A
428 position - m_strsrch_->pattern.defaultShiftSize < 0) {
429 setMatchNotFound();
430 return USEARCH_DONE;
431 }
46f4442e
A
432
433 for (;;) {
b75a7d8f
A
434 if (m_search_->isCanonicalMatch) {
435 // can't use exact here since extra accents are allowed.
436 usearch_handlePreviousCanonical(m_strsrch_, &status);
437 }
438 else {
439 usearch_handlePreviousExact(m_strsrch_, &status);
440 }
441 if (U_FAILURE(status)) {
442 return USEARCH_DONE;
443 }
444 if (m_breakiterator_ == NULL
445#if !UCONFIG_NO_BREAK_ITERATION
73c04bcf 446 ||
b75a7d8f
A
447 m_search_->matchedIndex == USEARCH_DONE ||
448 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
73c04bcf 449 m_breakiterator_->isBoundary(m_search_->matchedIndex +
b75a7d8f
A
450 m_search_->matchedLength))
451#endif
452 ) {
453 return m_search_->matchedIndex;
454 }
455 }
46f4442e
A
456#else
457 ucol_setOffset(m_strsrch_->textIter, position, &status);
458
459 if (m_search_->isCanonicalMatch) {
460 // *could* use exact match here since extra accents *not* allowed!
461 usearch_handlePreviousCanonical(m_strsrch_, &status);
462 } else {
463 usearch_handlePreviousExact(m_strsrch_, &status);
464 }
465
466 if (U_FAILURE(status)) {
467 return USEARCH_DONE;
468 }
469
470 return m_search_->matchedIndex;
471#endif
b75a7d8f 472 }
73c04bcf 473
b75a7d8f
A
474 return m_search_->matchedIndex;
475 }
476 return USEARCH_DONE;
477}
478
479U_NAMESPACE_END
480
481#endif /* #if !UCONFIG_NO_COLLATION */