]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/stsearch.cpp
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / stsearch.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2006 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_COLLATION
13
14 #include "unicode/stsearch.h"
15 #include "usrchimp.h"
16 #include "cmemory.h"
17
18 U_NAMESPACE_BEGIN
19
20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21
22 // public constructors and destructors -----------------------------------
23
24 StringSearch::StringSearch(const UnicodeString &pattern,
25 const UnicodeString &text,
26 const Locale &locale,
27 BreakIterator *breakiter,
28 UErrorCode &status) :
29 SearchIterator(text, breakiter),
30 m_collator_(),
31 m_pattern_(pattern)
32 {
33 if (U_FAILURE(status)) {
34 m_strsrch_ = NULL;
35 return;
36 }
37
38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39 m_text_.getBuffer(), m_text_.length(),
40 locale.getName(), (UBreakIterator *)breakiter,
41 &status);
42 uprv_free(m_search_);
43 m_search_ = NULL;
44
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
49 // to the exposed collator (setStrength etc) _should_ modify the
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
54
55 if (U_SUCCESS(status)) {
56 // Alias the collator
57 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
58 // m_search_ has been created by the base SearchIterator class
59 m_search_ = m_strsrch_->search;
60 }
61 }
62
63 StringSearch::StringSearch(const UnicodeString &pattern,
64 const UnicodeString &text,
65 RuleBasedCollator *coll,
66 BreakIterator *breakiter,
67 UErrorCode &status) :
68 SearchIterator(text, breakiter),
69 m_collator_(),
70 m_pattern_(pattern)
71 {
72 if (U_FAILURE(status)) {
73 m_strsrch_ = NULL;
74 return;
75 }
76 if (coll == NULL) {
77 status = U_ILLEGAL_ARGUMENT_ERROR;
78 m_strsrch_ = NULL;
79 return;
80 }
81 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
82 m_pattern_.length(),
83 m_text_.getBuffer(),
84 m_text_.length(), coll->ucollator,
85 (UBreakIterator *)breakiter,
86 &status);
87 uprv_free(m_search_);
88 m_search_ = NULL;
89
90 if (U_SUCCESS(status)) {
91 // Alias the collator
92 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
93 // m_search_ has been created by the base SearchIterator class
94 m_search_ = m_strsrch_->search;
95 }
96 }
97
98 StringSearch::StringSearch(const UnicodeString &pattern,
99 CharacterIterator &text,
100 const Locale &locale,
101 BreakIterator *breakiter,
102 UErrorCode &status) :
103 SearchIterator(text, breakiter),
104 m_collator_(),
105 m_pattern_(pattern)
106 {
107 if (U_FAILURE(status)) {
108 m_strsrch_ = NULL;
109 return;
110 }
111 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
112 m_text_.getBuffer(), m_text_.length(),
113 locale.getName(), (UBreakIterator *)breakiter,
114 &status);
115 uprv_free(m_search_);
116 m_search_ = NULL;
117
118 if (U_SUCCESS(status)) {
119 // Alias the collator
120 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
121 // m_search_ has been created by the base SearchIterator class
122 m_search_ = m_strsrch_->search;
123 }
124 }
125
126 StringSearch::StringSearch(const UnicodeString &pattern,
127 CharacterIterator &text,
128 RuleBasedCollator *coll,
129 BreakIterator *breakiter,
130 UErrorCode &status) :
131 SearchIterator(text, breakiter),
132 m_collator_(),
133 m_pattern_(pattern)
134 {
135 if (U_FAILURE(status)) {
136 m_strsrch_ = NULL;
137 return;
138 }
139 if (coll == NULL) {
140 status = U_ILLEGAL_ARGUMENT_ERROR;
141 m_strsrch_ = NULL;
142 return;
143 }
144 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
145 m_pattern_.length(),
146 m_text_.getBuffer(),
147 m_text_.length(), coll->ucollator,
148 (UBreakIterator *)breakiter,
149 &status);
150 uprv_free(m_search_);
151 m_search_ = NULL;
152
153 if (U_SUCCESS(status)) {
154 // Alias the collator
155 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
156 // m_search_ has been created by the base SearchIterator class
157 m_search_ = m_strsrch_->search;
158 }
159 }
160
161 StringSearch::StringSearch(const StringSearch &that) :
162 SearchIterator(that.m_text_, that.m_breakiterator_),
163 m_collator_(),
164 m_pattern_(that.m_pattern_)
165 {
166 UErrorCode status = U_ZERO_ERROR;
167
168 // Free m_search_ from the superclass
169 uprv_free(m_search_);
170 m_search_ = NULL;
171
172 if (that.m_strsrch_ == NULL) {
173 // This was not a good copy
174 m_strsrch_ = NULL;
175 }
176 else {
177 // Make a deep copy
178 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
179 m_pattern_.length(),
180 m_text_.getBuffer(),
181 m_text_.length(),
182 that.m_strsrch_->collator,
183 (UBreakIterator *)that.m_breakiterator_,
184 &status);
185 if (U_SUCCESS(status)) {
186 // Alias the collator
187 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
188 // m_search_ has been created by the base SearchIterator class
189 m_search_ = m_strsrch_->search;
190 }
191 }
192 }
193
194 StringSearch::~StringSearch()
195 {
196 if (m_strsrch_ != NULL) {
197 usearch_close(m_strsrch_);
198 m_search_ = NULL;
199 }
200 }
201
202 StringSearch *
203 StringSearch::clone() const {
204 return new StringSearch(*this);
205 }
206
207 // operator overloading ---------------------------------------------
208 StringSearch & StringSearch::operator=(const StringSearch &that)
209 {
210 if ((*this) != that) {
211 UErrorCode status = U_ZERO_ERROR;
212 m_text_ = that.m_text_;
213 m_breakiterator_ = that.m_breakiterator_;
214 m_pattern_ = that.m_pattern_;
215 // all m_search_ in the parent class is linked up with m_strsrch_
216 usearch_close(m_strsrch_);
217 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
218 m_pattern_.length(),
219 m_text_.getBuffer(),
220 m_text_.length(),
221 that.m_strsrch_->collator,
222 NULL, &status);
223 // Alias the collator
224 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
225 m_search_ = m_strsrch_->search;
226 }
227 return *this;
228 }
229
230 UBool StringSearch::operator==(const SearchIterator &that) const
231 {
232 if (this == &that) {
233 return TRUE;
234 }
235 if (SearchIterator::operator ==(that)) {
236 StringSearch &thatsrch = (StringSearch &)that;
237 return (this->m_pattern_ == thatsrch.m_pattern_ &&
238 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
239 }
240 return FALSE;
241 }
242
243 // public get and set methods ----------------------------------------
244
245 void StringSearch::setOffset(int32_t position, UErrorCode &status)
246 {
247 // status checked in usearch_setOffset
248 usearch_setOffset(m_strsrch_, position, &status);
249 }
250
251 int32_t StringSearch::getOffset(void) const
252 {
253 return usearch_getOffset(m_strsrch_);
254 }
255
256 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
257 {
258 if (U_SUCCESS(status)) {
259 m_text_ = text;
260 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
261 }
262 }
263
264 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
265 {
266 if (U_SUCCESS(status)) {
267 text.getText(m_text_);
268 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
269 }
270 }
271
272 RuleBasedCollator * StringSearch::getCollator() const
273 {
274 return (RuleBasedCollator *)&m_collator_;
275 }
276
277 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
278 {
279 if (U_SUCCESS(status)) {
280 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
281 // Alias the collator
282 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
283 }
284 }
285
286 void StringSearch::setPattern(const UnicodeString &pattern,
287 UErrorCode &status)
288 {
289 if (U_SUCCESS(status)) {
290 m_pattern_ = pattern;
291 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
292 &status);
293 }
294 }
295
296 const UnicodeString & StringSearch::getPattern() const
297 {
298 return m_pattern_;
299 }
300
301 // public methods ----------------------------------------------------
302
303 void StringSearch::reset()
304 {
305 usearch_reset(m_strsrch_);
306 }
307
308 SearchIterator * StringSearch::safeClone(void) const
309 {
310 UErrorCode status = U_ZERO_ERROR;
311 StringSearch *result = new StringSearch(m_pattern_, m_text_,
312 (RuleBasedCollator *)&m_collator_,
313 m_breakiterator_,
314 status);
315 /* test for NULL */
316 if (result == 0) {
317 status = U_MEMORY_ALLOCATION_ERROR;
318 return 0;
319 }
320 result->setOffset(getOffset(), status);
321 result->setMatchStart(m_strsrch_->search->matchedIndex);
322 result->setMatchLength(m_strsrch_->search->matchedLength);
323 if (U_FAILURE(status)) {
324 return NULL;
325 }
326 return result;
327 }
328
329 // protected method -------------------------------------------------
330
331 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
332 {
333 // values passed here are already in the pre-shift position
334 if (U_SUCCESS(status)) {
335 if (m_strsrch_->pattern.CELength == 0) {
336 m_search_->matchedIndex =
337 m_search_->matchedIndex == USEARCH_DONE ?
338 getOffset() : m_search_->matchedIndex + 1;
339 m_search_->matchedLength = 0;
340 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
341 &status);
342 if (m_search_->matchedIndex == m_search_->textLength) {
343 m_search_->matchedIndex = USEARCH_DONE;
344 }
345 }
346 else {
347 // looking at usearch.cpp, this part is shifted out to
348 // StringSearch instead of SearchIterator because m_strsrch_ is
349 // not accessible in SearchIterator
350 if (position + m_strsrch_->pattern.defaultShiftSize
351 > m_search_->textLength) {
352 setMatchNotFound();
353 return USEARCH_DONE;
354 }
355 if (m_search_->matchedLength <= 0) {
356 // the flipping direction issue has already been handled
357 // in next()
358 // for boundary check purposes. this will ensure that the
359 // next match will not preceed the current offset
360 // note search->matchedIndex will always be set to something
361 // in the code
362 m_search_->matchedIndex = position - 1;
363 }
364
365 ucol_setOffset(m_strsrch_->textIter, position, &status);
366 while (TRUE) {
367 if (m_search_->isCanonicalMatch) {
368 // can't use exact here since extra accents are allowed.
369 usearch_handleNextCanonical(m_strsrch_, &status);
370 }
371 else {
372 usearch_handleNextExact(m_strsrch_, &status);
373 }
374 if (U_FAILURE(status)) {
375 return USEARCH_DONE;
376 }
377 if (m_breakiterator_ == NULL
378 #if !UCONFIG_NO_BREAK_ITERATION
379 ||
380 m_search_->matchedIndex == USEARCH_DONE ||
381 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
382 m_breakiterator_->isBoundary(m_search_->matchedIndex +
383 m_search_->matchedLength))
384 #endif
385 ) {
386 if (m_search_->matchedIndex == USEARCH_DONE) {
387 ucol_setOffset(m_strsrch_->textIter,
388 m_search_->textLength, &status);
389 }
390 else {
391 ucol_setOffset(m_strsrch_->textIter,
392 m_search_->matchedIndex, &status);
393 }
394 return m_search_->matchedIndex;
395 }
396 }
397 }
398 }
399 return USEARCH_DONE;
400 }
401
402 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
403 {
404 // values passed here are already in the pre-shift position
405 if (U_SUCCESS(status)) {
406 if (m_strsrch_->pattern.CELength == 0) {
407 m_search_->matchedIndex =
408 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
409 m_search_->matchedIndex);
410 if (m_search_->matchedIndex == 0) {
411 setMatchNotFound();
412 }
413 else {
414 m_search_->matchedIndex --;
415 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
416 &status);
417 m_search_->matchedLength = 0;
418 }
419 }
420 else {
421 // looking at usearch.cpp, this part is shifted out to
422 // StringSearch instead of SearchIterator because m_strsrch_ is
423 // not accessible in SearchIterator
424 if (!m_search_->isOverlap &&
425 position - m_strsrch_->pattern.defaultShiftSize < 0) {
426 setMatchNotFound();
427 return USEARCH_DONE;
428 }
429 while (TRUE) {
430 if (m_search_->isCanonicalMatch) {
431 // can't use exact here since extra accents are allowed.
432 usearch_handlePreviousCanonical(m_strsrch_, &status);
433 }
434 else {
435 usearch_handlePreviousExact(m_strsrch_, &status);
436 }
437 if (U_FAILURE(status)) {
438 return USEARCH_DONE;
439 }
440 if (m_breakiterator_ == NULL
441 #if !UCONFIG_NO_BREAK_ITERATION
442 ||
443 m_search_->matchedIndex == USEARCH_DONE ||
444 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
445 m_breakiterator_->isBoundary(m_search_->matchedIndex +
446 m_search_->matchedLength))
447 #endif
448 ) {
449 return m_search_->matchedIndex;
450 }
451 }
452 }
453
454 return m_search_->matchedIndex;
455 }
456 return USEARCH_DONE;
457 }
458
459 U_NAMESPACE_END
460
461 #endif /* #if !UCONFIG_NO_COLLATION */