]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/stsearch.cpp
ICU-6.2.14.tar.gz
[apple/icu.git] / icuSources / i18n / stsearch.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2003 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_COLLATION
13
14 #include "unicode/stsearch.h"
15 #include "usrchimp.h"
16 #include "cmemory.h"
17
18 U_NAMESPACE_BEGIN
19
20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21
22 // public constructors and destructors -----------------------------------
23
24 StringSearch::StringSearch(const UnicodeString &pattern,
25 const UnicodeString &text,
26 const Locale &locale,
27 BreakIterator *breakiter,
28 UErrorCode &status) :
29 SearchIterator(text, breakiter),
30 m_collator_(),
31 m_pattern_(pattern)
32 {
33 if (U_FAILURE(status)) {
34 m_strsrch_ = NULL;
35 return;
36 }
37
38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39 m_text_.getBuffer(), m_text_.length(),
40 locale.getName(), (UBreakIterator *)breakiter,
41 &status);
42 uprv_free(m_search_);
43 m_search_ = NULL;
44
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
49 // to the exposed collator (setStrength etc) _should_ modify the
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
54
55 if (U_SUCCESS(status)) {
56 int32_t length;
57 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
58 m_collation_rules_.setTo(rules, length);
59 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
60 &m_collation_rules_);
61 // m_search_ has been created by the base SearchIterator class
62 m_search_ = m_strsrch_->search;
63 }
64 }
65
66 StringSearch::StringSearch(const UnicodeString &pattern,
67 const UnicodeString &text,
68 RuleBasedCollator *coll,
69 BreakIterator *breakiter,
70 UErrorCode &status) :
71 SearchIterator(text, breakiter),
72 m_collator_(),
73 m_pattern_(pattern)
74 {
75 if (U_FAILURE(status)) {
76 m_strsrch_ = NULL;
77 return;
78 }
79 if (coll == NULL) {
80 status = U_ILLEGAL_ARGUMENT_ERROR;
81 m_strsrch_ = NULL;
82 return;
83 }
84 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
85 m_pattern_.length(),
86 m_text_.getBuffer(),
87 m_text_.length(), coll->ucollator,
88 (UBreakIterator *)breakiter,
89 &status);
90 uprv_free(m_search_);
91 m_search_ = NULL;
92
93 if (U_SUCCESS(status)) {
94 int32_t length;
95 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
96 m_collation_rules_.setTo(rules, length);
97 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
98 &m_collation_rules_);
99 // m_search_ has been created by the base SearchIterator class
100 m_search_ = m_strsrch_->search;
101 }
102 }
103
104 StringSearch::StringSearch(const UnicodeString &pattern,
105 CharacterIterator &text,
106 const Locale &locale,
107 BreakIterator *breakiter,
108 UErrorCode &status) :
109 SearchIterator(text, breakiter),
110 m_collator_(),
111 m_pattern_(pattern)
112 {
113 if (U_FAILURE(status)) {
114 m_strsrch_ = NULL;
115 return;
116 }
117 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
118 m_text_.getBuffer(), m_text_.length(),
119 locale.getName(), (UBreakIterator *)breakiter,
120 &status);
121 uprv_free(m_search_);
122 m_search_ = NULL;
123
124 if (U_SUCCESS(status)) {
125 int32_t length;
126 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
127 m_collation_rules_.setTo(rules, length);
128 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
129 &m_collation_rules_);
130 // m_search_ has been created by the base SearchIterator class
131 m_search_ = m_strsrch_->search;
132 }
133 }
134
135 StringSearch::StringSearch(const UnicodeString &pattern,
136 CharacterIterator &text,
137 RuleBasedCollator *coll,
138 BreakIterator *breakiter,
139 UErrorCode &status) :
140 SearchIterator(text, breakiter),
141 m_collator_(),
142 m_pattern_(pattern)
143 {
144 if (U_FAILURE(status)) {
145 m_strsrch_ = NULL;
146 return;
147 }
148 if (coll == NULL) {
149 status = U_ILLEGAL_ARGUMENT_ERROR;
150 m_strsrch_ = NULL;
151 return;
152 }
153 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
154 m_pattern_.length(),
155 m_text_.getBuffer(),
156 m_text_.length(), coll->ucollator,
157 (UBreakIterator *)breakiter,
158 &status);
159 uprv_free(m_search_);
160 m_search_ = NULL;
161
162 if (U_SUCCESS(status)) {
163 int32_t length;
164 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
165 m_collation_rules_.setTo(rules, length);
166 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
167 &m_collation_rules_);
168 // m_search_ has been created by the base SearchIterator class
169 m_search_ = m_strsrch_->search;
170 }
171 }
172
173 StringSearch::StringSearch(const StringSearch &that) :
174 SearchIterator(that.m_text_, that.m_breakiterator_),
175 m_collator_(),
176 m_pattern_(that.m_pattern_)
177 {
178 UErrorCode status = U_ZERO_ERROR;
179 if (that.m_strsrch_ == NULL) {
180 m_strsrch_ = NULL;
181 status = U_ILLEGAL_ARGUMENT_ERROR;
182 }
183 else {
184 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
185 m_pattern_.length(),
186 m_text_.getBuffer(),
187 m_text_.length(),
188 that.m_strsrch_->collator,
189 (UBreakIterator *)that.m_breakiterator_,
190 &status);
191 }
192 uprv_free(m_search_);
193 m_search_ = NULL;
194
195 if (U_SUCCESS(status)) {
196 int32_t length;
197 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
198 m_collation_rules_.setTo(rules, length);
199 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
200 &m_collation_rules_);
201 // m_search_ has been created by the base SearchIterator class
202 m_search_ = m_strsrch_->search;
203 m_breakiterator_ = that.m_breakiterator_;
204 }
205 }
206
207 StringSearch::~StringSearch()
208 {
209 if (m_strsrch_ != NULL) {
210 usearch_close(m_strsrch_);
211 m_search_ = NULL;
212 }
213 }
214
215 StringSearch *
216 StringSearch::clone() const {
217 return new StringSearch(*this);
218 }
219
220 // operator overloading ---------------------------------------------
221 StringSearch & StringSearch::operator=(const StringSearch &that)
222 {
223 if ((*this) != that) {
224 UErrorCode status = U_ZERO_ERROR;
225 m_text_ = that.m_text_;
226 m_breakiterator_ = that.m_breakiterator_;
227 m_pattern_ = that.m_pattern_;
228 // all m_search_ in the parent class is linked up with m_strsrch_
229 usearch_close(m_strsrch_);
230 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
231 m_pattern_.length(),
232 m_text_.getBuffer(),
233 m_text_.length(),
234 that.m_strsrch_->collator,
235 NULL, &status);
236 int32_t length;
237 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
238 m_collation_rules_.setTo(rules, length);
239 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
240 &m_collation_rules_);
241 m_search_ = m_strsrch_->search;
242 }
243 return *this;
244 }
245
246 UBool StringSearch::operator==(const SearchIterator &that) const
247 {
248 if (this == &that) {
249 return TRUE;
250 }
251 if (SearchIterator::operator ==(that)) {
252 StringSearch &thatsrch = (StringSearch &)that;
253 return (this->m_pattern_ == thatsrch.m_pattern_ &&
254 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
255 }
256 return FALSE;
257 }
258
259 // public get and set methods ----------------------------------------
260
261 void StringSearch::setOffset(int32_t position, UErrorCode &status)
262 {
263 // status checked in usearch_setOffset
264 usearch_setOffset(m_strsrch_, position, &status);
265 }
266
267 int32_t StringSearch::getOffset(void) const
268 {
269 return usearch_getOffset(m_strsrch_);
270 }
271
272 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
273 {
274 if (U_SUCCESS(status)) {
275 m_text_ = text;
276 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
277 }
278 }
279
280 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
281 {
282 if (U_SUCCESS(status)) {
283 text.getText(m_text_);
284 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
285 }
286 }
287
288 RuleBasedCollator * StringSearch::getCollator() const
289 {
290 return (RuleBasedCollator *)&m_collator_;
291 }
292
293 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
294 {
295 if (U_SUCCESS(status)) {
296 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
297 m_collation_rules_.setTo(coll->getRules());
298 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
299 &m_collation_rules_);
300 }
301 }
302
303 void StringSearch::setPattern(const UnicodeString &pattern,
304 UErrorCode &status)
305 {
306 if (U_SUCCESS(status)) {
307 m_pattern_ = pattern;
308 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
309 &status);
310 }
311 }
312
313 const UnicodeString & StringSearch::getPattern() const
314 {
315 return m_pattern_;
316 }
317
318 // public methods ----------------------------------------------------
319
320 void StringSearch::reset()
321 {
322 usearch_reset(m_strsrch_);
323 }
324
325 SearchIterator * StringSearch::safeClone(void) const
326 {
327 UErrorCode status = U_ZERO_ERROR;
328 StringSearch *result = new StringSearch(m_pattern_, m_text_,
329 (RuleBasedCollator *)&m_collator_,
330 m_breakiterator_,
331 status);
332 /* test for NULL */
333 if (result == 0) {
334 status = U_MEMORY_ALLOCATION_ERROR;
335 return 0;
336 }
337 result->setOffset(getOffset(), status);
338 result->setMatchStart(m_strsrch_->search->matchedIndex);
339 result->setMatchLength(m_strsrch_->search->matchedLength);
340 if (U_FAILURE(status)) {
341 return NULL;
342 }
343 return result;
344 }
345
346 // protected method -------------------------------------------------
347
348 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
349 {
350 // values passed here are already in the pre-shift position
351 if (U_SUCCESS(status)) {
352 if (m_strsrch_->pattern.CELength == 0) {
353 m_search_->matchedIndex =
354 m_search_->matchedIndex == USEARCH_DONE ?
355 getOffset() : m_search_->matchedIndex + 1;
356 m_search_->matchedLength = 0;
357 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
358 &status);
359 if (m_search_->matchedIndex == m_search_->textLength) {
360 m_search_->matchedIndex = USEARCH_DONE;
361 }
362 }
363 else {
364 // looking at usearch.cpp, this part is shifted out to
365 // StringSearch instead of SearchIterator because m_strsrch_ is
366 // not accessible in SearchIterator
367 if (position + m_strsrch_->pattern.defaultShiftSize
368 > m_search_->textLength) {
369 setMatchNotFound();
370 return USEARCH_DONE;
371 }
372 if (m_search_->matchedLength <= 0) {
373 // the flipping direction issue has already been handled
374 // in next()
375 // for boundary check purposes. this will ensure that the
376 // next match will not preceed the current offset
377 // note search->matchedIndex will always be set to something
378 // in the code
379 m_search_->matchedIndex = position - 1;
380 }
381
382 ucol_setOffset(m_strsrch_->textIter, position, &status);
383 while (TRUE) {
384 if (m_search_->isCanonicalMatch) {
385 // can't use exact here since extra accents are allowed.
386 usearch_handleNextCanonical(m_strsrch_, &status);
387 }
388 else {
389 usearch_handleNextExact(m_strsrch_, &status);
390 }
391 if (U_FAILURE(status)) {
392 return USEARCH_DONE;
393 }
394 if (m_breakiterator_ == NULL
395 #if !UCONFIG_NO_BREAK_ITERATION
396 ||
397 m_search_->matchedIndex == USEARCH_DONE ||
398 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
399 m_breakiterator_->isBoundary(m_search_->matchedIndex +
400 m_search_->matchedLength))
401 #endif
402 ) {
403 if (m_search_->matchedIndex == USEARCH_DONE) {
404 ucol_setOffset(m_strsrch_->textIter,
405 m_search_->textLength, &status);
406 }
407 else {
408 ucol_setOffset(m_strsrch_->textIter,
409 m_search_->matchedIndex, &status);
410 }
411 return m_search_->matchedIndex;
412 }
413 }
414 }
415 }
416 return USEARCH_DONE;
417 }
418
419 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
420 {
421 // values passed here are already in the pre-shift position
422 if (U_SUCCESS(status)) {
423 if (m_strsrch_->pattern.CELength == 0) {
424 m_search_->matchedIndex =
425 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
426 m_search_->matchedIndex);
427 if (m_search_->matchedIndex == 0) {
428 setMatchNotFound();
429 }
430 else {
431 m_search_->matchedIndex --;
432 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
433 &status);
434 m_search_->matchedLength = 0;
435 }
436 }
437 else {
438 // looking at usearch.cpp, this part is shifted out to
439 // StringSearch instead of SearchIterator because m_strsrch_ is
440 // not accessible in SearchIterator
441 if (!m_search_->isOverlap &&
442 position - m_strsrch_->pattern.defaultShiftSize < 0) {
443 setMatchNotFound();
444 return USEARCH_DONE;
445 }
446 while (TRUE) {
447 if (m_search_->isCanonicalMatch) {
448 // can't use exact here since extra accents are allowed.
449 usearch_handlePreviousCanonical(m_strsrch_, &status);
450 }
451 else {
452 usearch_handlePreviousExact(m_strsrch_, &status);
453 }
454 if (U_FAILURE(status)) {
455 return USEARCH_DONE;
456 }
457 if (m_breakiterator_ == NULL
458 #if !UCONFIG_NO_BREAK_ITERATION
459 ||
460 m_search_->matchedIndex == USEARCH_DONE ||
461 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
462 m_breakiterator_->isBoundary(m_search_->matchedIndex +
463 m_search_->matchedLength))
464 #endif
465 ) {
466 return m_search_->matchedIndex;
467 }
468 }
469 }
470
471 return m_search_->matchedIndex;
472 }
473 return USEARCH_DONE;
474 }
475
476 U_NAMESPACE_END
477
478 #endif /* #if !UCONFIG_NO_COLLATION */