]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/stsearch.cpp
ICU-400.40.tar.gz
[apple/icu.git] / icuSources / i18n / stsearch.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
46f4442e 3* Copyright (C) 2001-2008 IBM and others. All rights reserved.
b75a7d8f
A
4**********************************************************************
5* Date Name Description
6* 03/22/2000 helena Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
46f4442e 12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
b75a7d8f
A
13
14#include "unicode/stsearch.h"
15#include "usrchimp.h"
16#include "cmemory.h"
17
18U_NAMESPACE_BEGIN
19
374ca955 20UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
b75a7d8f
A
21
22// public constructors and destructors -----------------------------------
23
73c04bcf 24StringSearch::StringSearch(const UnicodeString &pattern,
b75a7d8f 25 const UnicodeString &text,
73c04bcf 26 const Locale &locale,
b75a7d8f
A
27 BreakIterator *breakiter,
28 UErrorCode &status) :
73c04bcf 29 SearchIterator(text, breakiter),
b75a7d8f
A
30 m_collator_(),
31 m_pattern_(pattern)
32{
33 if (U_FAILURE(status)) {
34 m_strsrch_ = NULL;
35 return;
36 }
37
73c04bcf
A
38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39 m_text_.getBuffer(), m_text_.length(),
40 locale.getName(), (UBreakIterator *)breakiter,
b75a7d8f
A
41 &status);
42 uprv_free(m_search_);
43 m_search_ = NULL;
44
374ca955
A
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
73c04bcf 49 // to the exposed collator (setStrength etc) _should_ modify the
374ca955
A
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
b75a7d8f
A
54
55 if (U_SUCCESS(status)) {
73c04bcf
A
56 // Alias the collator
57 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
b75a7d8f
A
58 // m_search_ has been created by the base SearchIterator class
59 m_search_ = m_strsrch_->search;
60 }
61}
62
73c04bcf 63StringSearch::StringSearch(const UnicodeString &pattern,
b75a7d8f 64 const UnicodeString &text,
73c04bcf 65 RuleBasedCollator *coll,
b75a7d8f
A
66 BreakIterator *breakiter,
67 UErrorCode &status) :
73c04bcf 68 SearchIterator(text, breakiter),
b75a7d8f
A
69 m_collator_(),
70 m_pattern_(pattern)
71{
72 if (U_FAILURE(status)) {
73 m_strsrch_ = NULL;
74 return;
75 }
76 if (coll == NULL) {
77 status = U_ILLEGAL_ARGUMENT_ERROR;
78 m_strsrch_ = NULL;
79 return;
80 }
73c04bcf
A
81 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
82 m_pattern_.length(),
83 m_text_.getBuffer(),
84 m_text_.length(), coll->ucollator,
85 (UBreakIterator *)breakiter,
b75a7d8f
A
86 &status);
87 uprv_free(m_search_);
88 m_search_ = NULL;
89
90 if (U_SUCCESS(status)) {
73c04bcf
A
91 // Alias the collator
92 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
b75a7d8f
A
93 // m_search_ has been created by the base SearchIterator class
94 m_search_ = m_strsrch_->search;
95 }
96}
97
73c04bcf 98StringSearch::StringSearch(const UnicodeString &pattern,
b75a7d8f 99 CharacterIterator &text,
73c04bcf 100 const Locale &locale,
b75a7d8f
A
101 BreakIterator *breakiter,
102 UErrorCode &status) :
73c04bcf 103 SearchIterator(text, breakiter),
b75a7d8f
A
104 m_collator_(),
105 m_pattern_(pattern)
106{
107 if (U_FAILURE(status)) {
108 m_strsrch_ = NULL;
109 return;
110 }
73c04bcf
A
111 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
112 m_text_.getBuffer(), m_text_.length(),
113 locale.getName(), (UBreakIterator *)breakiter,
b75a7d8f
A
114 &status);
115 uprv_free(m_search_);
116 m_search_ = NULL;
117
118 if (U_SUCCESS(status)) {
73c04bcf
A
119 // Alias the collator
120 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
b75a7d8f
A
121 // m_search_ has been created by the base SearchIterator class
122 m_search_ = m_strsrch_->search;
123 }
124}
125
126StringSearch::StringSearch(const UnicodeString &pattern,
127 CharacterIterator &text,
73c04bcf 128 RuleBasedCollator *coll,
b75a7d8f
A
129 BreakIterator *breakiter,
130 UErrorCode &status) :
73c04bcf 131 SearchIterator(text, breakiter),
b75a7d8f
A
132 m_collator_(),
133 m_pattern_(pattern)
134{
135 if (U_FAILURE(status)) {
136 m_strsrch_ = NULL;
137 return;
138 }
139 if (coll == NULL) {
140 status = U_ILLEGAL_ARGUMENT_ERROR;
141 m_strsrch_ = NULL;
142 return;
143 }
73c04bcf
A
144 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
145 m_pattern_.length(),
146 m_text_.getBuffer(),
147 m_text_.length(), coll->ucollator,
148 (UBreakIterator *)breakiter,
b75a7d8f
A
149 &status);
150 uprv_free(m_search_);
151 m_search_ = NULL;
152
153 if (U_SUCCESS(status)) {
73c04bcf
A
154 // Alias the collator
155 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
b75a7d8f
A
156 // m_search_ has been created by the base SearchIterator class
157 m_search_ = m_strsrch_->search;
158 }
159}
160
161StringSearch::StringSearch(const StringSearch &that) :
162 SearchIterator(that.m_text_, that.m_breakiterator_),
163 m_collator_(),
164 m_pattern_(that.m_pattern_)
165{
166 UErrorCode status = U_ZERO_ERROR;
73c04bcf
A
167
168 // Free m_search_ from the superclass
169 uprv_free(m_search_);
170 m_search_ = NULL;
171
b75a7d8f 172 if (that.m_strsrch_ == NULL) {
73c04bcf 173 // This was not a good copy
b75a7d8f 174 m_strsrch_ = NULL;
b75a7d8f
A
175 }
176 else {
73c04bcf
A
177 // Make a deep copy
178 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
179 m_pattern_.length(),
180 m_text_.getBuffer(),
181 m_text_.length(),
182 that.m_strsrch_->collator,
183 (UBreakIterator *)that.m_breakiterator_,
b75a7d8f 184 &status);
73c04bcf
A
185 if (U_SUCCESS(status)) {
186 // Alias the collator
187 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
188 // m_search_ has been created by the base SearchIterator class
189 m_search_ = m_strsrch_->search;
190 }
b75a7d8f
A
191 }
192}
193
194StringSearch::~StringSearch()
195{
196 if (m_strsrch_ != NULL) {
197 usearch_close(m_strsrch_);
198 m_search_ = NULL;
199 }
200}
201
374ca955
A
202StringSearch *
203StringSearch::clone() const {
204 return new StringSearch(*this);
205}
206
b75a7d8f
A
207// operator overloading ---------------------------------------------
208StringSearch & StringSearch::operator=(const StringSearch &that)
209{
210 if ((*this) != that) {
211 UErrorCode status = U_ZERO_ERROR;
212 m_text_ = that.m_text_;
213 m_breakiterator_ = that.m_breakiterator_;
214 m_pattern_ = that.m_pattern_;
215 // all m_search_ in the parent class is linked up with m_strsrch_
216 usearch_close(m_strsrch_);
73c04bcf
A
217 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
218 m_pattern_.length(),
219 m_text_.getBuffer(),
220 m_text_.length(),
221 that.m_strsrch_->collator,
b75a7d8f 222 NULL, &status);
46f4442e
A
223 // Check null pointer
224 if (m_strsrch_ != NULL) {
225 // Alias the collator
226 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
227 m_search_ = m_strsrch_->search;
228 }
b75a7d8f
A
229 }
230 return *this;
231}
232
233UBool StringSearch::operator==(const SearchIterator &that) const
234{
235 if (this == &that) {
236 return TRUE;
237 }
238 if (SearchIterator::operator ==(that)) {
239 StringSearch &thatsrch = (StringSearch &)that;
240 return (this->m_pattern_ == thatsrch.m_pattern_ &&
241 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
242 }
243 return FALSE;
244}
245
246// public get and set methods ----------------------------------------
247
248void StringSearch::setOffset(int32_t position, UErrorCode &status)
249{
250 // status checked in usearch_setOffset
251 usearch_setOffset(m_strsrch_, position, &status);
252}
253
254int32_t StringSearch::getOffset(void) const
255{
256 return usearch_getOffset(m_strsrch_);
257}
258
259void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
260{
261 if (U_SUCCESS(status)) {
262 m_text_ = text;
263 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
264 }
265}
73c04bcf 266
b75a7d8f
A
267void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
268{
269 if (U_SUCCESS(status)) {
270 text.getText(m_text_);
271 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
272 }
273}
274
275RuleBasedCollator * StringSearch::getCollator() const
276{
277 return (RuleBasedCollator *)&m_collator_;
278}
73c04bcf 279
b75a7d8f
A
280void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
281{
282 if (U_SUCCESS(status)) {
283 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
73c04bcf
A
284 // Alias the collator
285 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
b75a7d8f
A
286 }
287}
73c04bcf
A
288
289void StringSearch::setPattern(const UnicodeString &pattern,
b75a7d8f
A
290 UErrorCode &status)
291{
292 if (U_SUCCESS(status)) {
293 m_pattern_ = pattern;
294 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
295 &status);
296 }
297}
73c04bcf 298
b75a7d8f
A
299const UnicodeString & StringSearch::getPattern() const
300{
301 return m_pattern_;
302}
303
304// public methods ----------------------------------------------------
305
306void StringSearch::reset()
307{
308 usearch_reset(m_strsrch_);
309}
310
311SearchIterator * StringSearch::safeClone(void) const
312{
313 UErrorCode status = U_ZERO_ERROR;
73c04bcf
A
314 StringSearch *result = new StringSearch(m_pattern_, m_text_,
315 (RuleBasedCollator *)&m_collator_,
b75a7d8f
A
316 m_breakiterator_,
317 status);
318 /* test for NULL */
319 if (result == 0) {
320 status = U_MEMORY_ALLOCATION_ERROR;
321 return 0;
322 }
323 result->setOffset(getOffset(), status);
324 result->setMatchStart(m_strsrch_->search->matchedIndex);
325 result->setMatchLength(m_strsrch_->search->matchedLength);
326 if (U_FAILURE(status)) {
327 return NULL;
328 }
329 return result;
330}
73c04bcf 331
b75a7d8f
A
332// protected method -------------------------------------------------
333
334int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
335{
336 // values passed here are already in the pre-shift position
337 if (U_SUCCESS(status)) {
338 if (m_strsrch_->pattern.CELength == 0) {
73c04bcf
A
339 m_search_->matchedIndex =
340 m_search_->matchedIndex == USEARCH_DONE ?
b75a7d8f
A
341 getOffset() : m_search_->matchedIndex + 1;
342 m_search_->matchedLength = 0;
73c04bcf 343 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
b75a7d8f
A
344 &status);
345 if (m_search_->matchedIndex == m_search_->textLength) {
346 m_search_->matchedIndex = USEARCH_DONE;
347 }
348 }
349 else {
73c04bcf 350 // looking at usearch.cpp, this part is shifted out to
b75a7d8f
A
351 // StringSearch instead of SearchIterator because m_strsrch_ is
352 // not accessible in SearchIterator
46f4442e 353#if 0
73c04bcf 354 if (position + m_strsrch_->pattern.defaultShiftSize
374ca955 355 > m_search_->textLength) {
b75a7d8f
A
356 setMatchNotFound();
357 return USEARCH_DONE;
358 }
46f4442e 359#endif
374ca955 360 if (m_search_->matchedLength <= 0) {
73c04bcf 361 // the flipping direction issue has already been handled
374ca955
A
362 // in next()
363 // for boundary check purposes. this will ensure that the
364 // next match will not preceed the current offset
365 // note search->matchedIndex will always be set to something
366 // in the code
367 m_search_->matchedIndex = position - 1;
368 }
369
370 ucol_setOffset(m_strsrch_->textIter, position, &status);
46f4442e
A
371
372#if 0
373 for (;;) {
b75a7d8f
A
374 if (m_search_->isCanonicalMatch) {
375 // can't use exact here since extra accents are allowed.
376 usearch_handleNextCanonical(m_strsrch_, &status);
377 }
378 else {
379 usearch_handleNextExact(m_strsrch_, &status);
380 }
381 if (U_FAILURE(status)) {
382 return USEARCH_DONE;
383 }
384 if (m_breakiterator_ == NULL
385#if !UCONFIG_NO_BREAK_ITERATION
73c04bcf 386 ||
b75a7d8f
A
387 m_search_->matchedIndex == USEARCH_DONE ||
388 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
73c04bcf 389 m_breakiterator_->isBoundary(m_search_->matchedIndex +
b75a7d8f
A
390 m_search_->matchedLength))
391#endif
392 ) {
374ca955 393 if (m_search_->matchedIndex == USEARCH_DONE) {
73c04bcf 394 ucol_setOffset(m_strsrch_->textIter,
374ca955
A
395 m_search_->textLength, &status);
396 }
397 else {
73c04bcf 398 ucol_setOffset(m_strsrch_->textIter,
374ca955
A
399 m_search_->matchedIndex, &status);
400 }
b75a7d8f
A
401 return m_search_->matchedIndex;
402 }
403 }
46f4442e
A
404#else
405 // if m_strsrch_->breakIter is always the same as m_breakiterator_
406 // then we don't need to check the match boundaries here because
407 // usearch_handleNextXXX will already have done it.
408 if (m_search_->isCanonicalMatch) {
409 // *could* actually use exact here 'cause no extra accents allowed...
410 usearch_handleNextCanonical(m_strsrch_, &status);
411 } else {
412 usearch_handleNextExact(m_strsrch_, &status);
413 }
414
415 if (U_FAILURE(status)) {
416 return USEARCH_DONE;
417 }
418
419 if (m_search_->matchedIndex == USEARCH_DONE) {
420 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
421 } else {
422 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
423 }
424
425 return m_search_->matchedIndex;
426#endif
b75a7d8f
A
427 }
428 }
429 return USEARCH_DONE;
430}
431
432int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
433{
434 // values passed here are already in the pre-shift position
435 if (U_SUCCESS(status)) {
436 if (m_strsrch_->pattern.CELength == 0) {
73c04bcf
A
437 m_search_->matchedIndex =
438 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
b75a7d8f
A
439 m_search_->matchedIndex);
440 if (m_search_->matchedIndex == 0) {
441 setMatchNotFound();
442 }
443 else {
444 m_search_->matchedIndex --;
73c04bcf 445 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
b75a7d8f
A
446 &status);
447 m_search_->matchedLength = 0;
448 }
449 }
450 else {
73c04bcf 451 // looking at usearch.cpp, this part is shifted out to
b75a7d8f
A
452 // StringSearch instead of SearchIterator because m_strsrch_ is
453 // not accessible in SearchIterator
46f4442e 454#if 0
73c04bcf 455 if (!m_search_->isOverlap &&
b75a7d8f
A
456 position - m_strsrch_->pattern.defaultShiftSize < 0) {
457 setMatchNotFound();
458 return USEARCH_DONE;
459 }
46f4442e
A
460
461 for (;;) {
b75a7d8f
A
462 if (m_search_->isCanonicalMatch) {
463 // can't use exact here since extra accents are allowed.
464 usearch_handlePreviousCanonical(m_strsrch_, &status);
465 }
466 else {
467 usearch_handlePreviousExact(m_strsrch_, &status);
468 }
469 if (U_FAILURE(status)) {
470 return USEARCH_DONE;
471 }
472 if (m_breakiterator_ == NULL
473#if !UCONFIG_NO_BREAK_ITERATION
73c04bcf 474 ||
b75a7d8f
A
475 m_search_->matchedIndex == USEARCH_DONE ||
476 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
73c04bcf 477 m_breakiterator_->isBoundary(m_search_->matchedIndex +
b75a7d8f
A
478 m_search_->matchedLength))
479#endif
480 ) {
481 return m_search_->matchedIndex;
482 }
483 }
46f4442e
A
484#else
485 ucol_setOffset(m_strsrch_->textIter, position, &status);
486
487 if (m_search_->isCanonicalMatch) {
488 // *could* use exact match here since extra accents *not* allowed!
489 usearch_handlePreviousCanonical(m_strsrch_, &status);
490 } else {
491 usearch_handlePreviousExact(m_strsrch_, &status);
492 }
493
494 if (U_FAILURE(status)) {
495 return USEARCH_DONE;
496 }
497
498 return m_search_->matchedIndex;
499#endif
b75a7d8f 500 }
73c04bcf 501
b75a7d8f
A
502 return m_search_->matchedIndex;
503 }
504 return USEARCH_DONE;
505}
506
507U_NAMESPACE_END
508
509#endif /* #if !UCONFIG_NO_COLLATION */