]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/stsearch.cpp
ICU-6.2.14.tar.gz
[apple/icu.git] / icuSources / i18n / stsearch.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
3* Copyright (C) 2001-2003 IBM and others. All rights reserved.
4**********************************************************************
5* Date Name Description
6* 03/22/2000 helena Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_COLLATION
13
14#include "unicode/stsearch.h"
15#include "usrchimp.h"
16#include "cmemory.h"
17
18U_NAMESPACE_BEGIN
19
374ca955 20UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
b75a7d8f
A
21
22// public constructors and destructors -----------------------------------
23
24StringSearch::StringSearch(const UnicodeString &pattern,
25 const UnicodeString &text,
26 const Locale &locale,
27 BreakIterator *breakiter,
28 UErrorCode &status) :
29 SearchIterator(text, breakiter),
30 m_collator_(),
31 m_pattern_(pattern)
32{
33 if (U_FAILURE(status)) {
34 m_strsrch_ = NULL;
35 return;
36 }
37
38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39 m_text_.getBuffer(), m_text_.length(),
40 locale.getName(), (UBreakIterator *)breakiter,
41 &status);
42 uprv_free(m_search_);
43 m_search_ = NULL;
44
374ca955
A
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
49 // to the exposed collator (setStrength etc) _should_ modify the
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
b75a7d8f
A
54
55 if (U_SUCCESS(status)) {
56 int32_t length;
57 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
58 m_collation_rules_.setTo(rules, length);
59 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
60 &m_collation_rules_);
61 // m_search_ has been created by the base SearchIterator class
62 m_search_ = m_strsrch_->search;
63 }
64}
65
66StringSearch::StringSearch(const UnicodeString &pattern,
67 const UnicodeString &text,
68 RuleBasedCollator *coll,
69 BreakIterator *breakiter,
70 UErrorCode &status) :
71 SearchIterator(text, breakiter),
72 m_collator_(),
73 m_pattern_(pattern)
74{
75 if (U_FAILURE(status)) {
76 m_strsrch_ = NULL;
77 return;
78 }
79 if (coll == NULL) {
80 status = U_ILLEGAL_ARGUMENT_ERROR;
81 m_strsrch_ = NULL;
82 return;
83 }
84 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
85 m_pattern_.length(),
86 m_text_.getBuffer(),
87 m_text_.length(), coll->ucollator,
88 (UBreakIterator *)breakiter,
89 &status);
90 uprv_free(m_search_);
91 m_search_ = NULL;
92
93 if (U_SUCCESS(status)) {
94 int32_t length;
95 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
96 m_collation_rules_.setTo(rules, length);
97 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
98 &m_collation_rules_);
99 // m_search_ has been created by the base SearchIterator class
100 m_search_ = m_strsrch_->search;
101 }
102}
103
104StringSearch::StringSearch(const UnicodeString &pattern,
105 CharacterIterator &text,
106 const Locale &locale,
107 BreakIterator *breakiter,
108 UErrorCode &status) :
109 SearchIterator(text, breakiter),
110 m_collator_(),
111 m_pattern_(pattern)
112{
113 if (U_FAILURE(status)) {
114 m_strsrch_ = NULL;
115 return;
116 }
117 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
118 m_text_.getBuffer(), m_text_.length(),
119 locale.getName(), (UBreakIterator *)breakiter,
120 &status);
121 uprv_free(m_search_);
122 m_search_ = NULL;
123
124 if (U_SUCCESS(status)) {
125 int32_t length;
126 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
127 m_collation_rules_.setTo(rules, length);
128 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
129 &m_collation_rules_);
130 // m_search_ has been created by the base SearchIterator class
131 m_search_ = m_strsrch_->search;
132 }
133}
134
135StringSearch::StringSearch(const UnicodeString &pattern,
136 CharacterIterator &text,
137 RuleBasedCollator *coll,
138 BreakIterator *breakiter,
139 UErrorCode &status) :
140 SearchIterator(text, breakiter),
141 m_collator_(),
142 m_pattern_(pattern)
143{
144 if (U_FAILURE(status)) {
145 m_strsrch_ = NULL;
146 return;
147 }
148 if (coll == NULL) {
149 status = U_ILLEGAL_ARGUMENT_ERROR;
150 m_strsrch_ = NULL;
151 return;
152 }
153 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
154 m_pattern_.length(),
155 m_text_.getBuffer(),
156 m_text_.length(), coll->ucollator,
157 (UBreakIterator *)breakiter,
158 &status);
159 uprv_free(m_search_);
160 m_search_ = NULL;
161
162 if (U_SUCCESS(status)) {
163 int32_t length;
164 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
165 m_collation_rules_.setTo(rules, length);
166 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
167 &m_collation_rules_);
168 // m_search_ has been created by the base SearchIterator class
169 m_search_ = m_strsrch_->search;
170 }
171}
172
173StringSearch::StringSearch(const StringSearch &that) :
174 SearchIterator(that.m_text_, that.m_breakiterator_),
175 m_collator_(),
176 m_pattern_(that.m_pattern_)
177{
178 UErrorCode status = U_ZERO_ERROR;
179 if (that.m_strsrch_ == NULL) {
180 m_strsrch_ = NULL;
181 status = U_ILLEGAL_ARGUMENT_ERROR;
182 }
183 else {
184 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
185 m_pattern_.length(),
186 m_text_.getBuffer(),
187 m_text_.length(),
188 that.m_strsrch_->collator,
189 (UBreakIterator *)that.m_breakiterator_,
190 &status);
191 }
192 uprv_free(m_search_);
193 m_search_ = NULL;
194
195 if (U_SUCCESS(status)) {
196 int32_t length;
197 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
198 m_collation_rules_.setTo(rules, length);
199 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
200 &m_collation_rules_);
201 // m_search_ has been created by the base SearchIterator class
202 m_search_ = m_strsrch_->search;
203 m_breakiterator_ = that.m_breakiterator_;
204 }
205}
206
207StringSearch::~StringSearch()
208{
209 if (m_strsrch_ != NULL) {
210 usearch_close(m_strsrch_);
211 m_search_ = NULL;
212 }
213}
214
374ca955
A
215StringSearch *
216StringSearch::clone() const {
217 return new StringSearch(*this);
218}
219
b75a7d8f
A
220// operator overloading ---------------------------------------------
221StringSearch & StringSearch::operator=(const StringSearch &that)
222{
223 if ((*this) != that) {
224 UErrorCode status = U_ZERO_ERROR;
225 m_text_ = that.m_text_;
226 m_breakiterator_ = that.m_breakiterator_;
227 m_pattern_ = that.m_pattern_;
228 // all m_search_ in the parent class is linked up with m_strsrch_
229 usearch_close(m_strsrch_);
230 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
231 m_pattern_.length(),
232 m_text_.getBuffer(),
233 m_text_.length(),
234 that.m_strsrch_->collator,
235 NULL, &status);
236 int32_t length;
237 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
238 m_collation_rules_.setTo(rules, length);
239 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
240 &m_collation_rules_);
241 m_search_ = m_strsrch_->search;
242 }
243 return *this;
244}
245
246UBool StringSearch::operator==(const SearchIterator &that) const
247{
248 if (this == &that) {
249 return TRUE;
250 }
251 if (SearchIterator::operator ==(that)) {
252 StringSearch &thatsrch = (StringSearch &)that;
253 return (this->m_pattern_ == thatsrch.m_pattern_ &&
254 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
255 }
256 return FALSE;
257}
258
259// public get and set methods ----------------------------------------
260
261void StringSearch::setOffset(int32_t position, UErrorCode &status)
262{
263 // status checked in usearch_setOffset
264 usearch_setOffset(m_strsrch_, position, &status);
265}
266
267int32_t StringSearch::getOffset(void) const
268{
269 return usearch_getOffset(m_strsrch_);
270}
271
272void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
273{
274 if (U_SUCCESS(status)) {
275 m_text_ = text;
276 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
277 }
278}
279
280void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
281{
282 if (U_SUCCESS(status)) {
283 text.getText(m_text_);
284 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
285 }
286}
287
288RuleBasedCollator * StringSearch::getCollator() const
289{
290 return (RuleBasedCollator *)&m_collator_;
291}
292
293void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
294{
295 if (U_SUCCESS(status)) {
296 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
297 m_collation_rules_.setTo(coll->getRules());
298 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
299 &m_collation_rules_);
300 }
301}
302
303void StringSearch::setPattern(const UnicodeString &pattern,
304 UErrorCode &status)
305{
306 if (U_SUCCESS(status)) {
307 m_pattern_ = pattern;
308 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
309 &status);
310 }
311}
312
313const UnicodeString & StringSearch::getPattern() const
314{
315 return m_pattern_;
316}
317
318// public methods ----------------------------------------------------
319
320void StringSearch::reset()
321{
322 usearch_reset(m_strsrch_);
323}
324
325SearchIterator * StringSearch::safeClone(void) const
326{
327 UErrorCode status = U_ZERO_ERROR;
328 StringSearch *result = new StringSearch(m_pattern_, m_text_,
329 (RuleBasedCollator *)&m_collator_,
330 m_breakiterator_,
331 status);
332 /* test for NULL */
333 if (result == 0) {
334 status = U_MEMORY_ALLOCATION_ERROR;
335 return 0;
336 }
337 result->setOffset(getOffset(), status);
338 result->setMatchStart(m_strsrch_->search->matchedIndex);
339 result->setMatchLength(m_strsrch_->search->matchedLength);
340 if (U_FAILURE(status)) {
341 return NULL;
342 }
343 return result;
344}
345
346// protected method -------------------------------------------------
347
348int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
349{
350 // values passed here are already in the pre-shift position
351 if (U_SUCCESS(status)) {
352 if (m_strsrch_->pattern.CELength == 0) {
353 m_search_->matchedIndex =
354 m_search_->matchedIndex == USEARCH_DONE ?
355 getOffset() : m_search_->matchedIndex + 1;
356 m_search_->matchedLength = 0;
357 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
358 &status);
359 if (m_search_->matchedIndex == m_search_->textLength) {
360 m_search_->matchedIndex = USEARCH_DONE;
361 }
362 }
363 else {
364 // looking at usearch.cpp, this part is shifted out to
365 // StringSearch instead of SearchIterator because m_strsrch_ is
366 // not accessible in SearchIterator
367 if (position + m_strsrch_->pattern.defaultShiftSize
374ca955 368 > m_search_->textLength) {
b75a7d8f
A
369 setMatchNotFound();
370 return USEARCH_DONE;
371 }
374ca955
A
372 if (m_search_->matchedLength <= 0) {
373 // the flipping direction issue has already been handled
374 // in next()
375 // for boundary check purposes. this will ensure that the
376 // next match will not preceed the current offset
377 // note search->matchedIndex will always be set to something
378 // in the code
379 m_search_->matchedIndex = position - 1;
380 }
381
382 ucol_setOffset(m_strsrch_->textIter, position, &status);
b75a7d8f
A
383 while (TRUE) {
384 if (m_search_->isCanonicalMatch) {
385 // can't use exact here since extra accents are allowed.
386 usearch_handleNextCanonical(m_strsrch_, &status);
387 }
388 else {
389 usearch_handleNextExact(m_strsrch_, &status);
390 }
391 if (U_FAILURE(status)) {
392 return USEARCH_DONE;
393 }
394 if (m_breakiterator_ == NULL
395#if !UCONFIG_NO_BREAK_ITERATION
396 ||
397 m_search_->matchedIndex == USEARCH_DONE ||
398 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
399 m_breakiterator_->isBoundary(m_search_->matchedIndex +
400 m_search_->matchedLength))
401#endif
402 ) {
374ca955
A
403 if (m_search_->matchedIndex == USEARCH_DONE) {
404 ucol_setOffset(m_strsrch_->textIter,
405 m_search_->textLength, &status);
406 }
407 else {
408 ucol_setOffset(m_strsrch_->textIter,
409 m_search_->matchedIndex, &status);
410 }
b75a7d8f
A
411 return m_search_->matchedIndex;
412 }
413 }
414 }
415 }
416 return USEARCH_DONE;
417}
418
419int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
420{
421 // values passed here are already in the pre-shift position
422 if (U_SUCCESS(status)) {
423 if (m_strsrch_->pattern.CELength == 0) {
424 m_search_->matchedIndex =
425 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
426 m_search_->matchedIndex);
427 if (m_search_->matchedIndex == 0) {
428 setMatchNotFound();
429 }
430 else {
431 m_search_->matchedIndex --;
432 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
433 &status);
434 m_search_->matchedLength = 0;
435 }
436 }
437 else {
438 // looking at usearch.cpp, this part is shifted out to
439 // StringSearch instead of SearchIterator because m_strsrch_ is
440 // not accessible in SearchIterator
441 if (!m_search_->isOverlap &&
442 position - m_strsrch_->pattern.defaultShiftSize < 0) {
443 setMatchNotFound();
444 return USEARCH_DONE;
445 }
446 while (TRUE) {
447 if (m_search_->isCanonicalMatch) {
448 // can't use exact here since extra accents are allowed.
449 usearch_handlePreviousCanonical(m_strsrch_, &status);
450 }
451 else {
452 usearch_handlePreviousExact(m_strsrch_, &status);
453 }
454 if (U_FAILURE(status)) {
455 return USEARCH_DONE;
456 }
457 if (m_breakiterator_ == NULL
458#if !UCONFIG_NO_BREAK_ITERATION
459 ||
460 m_search_->matchedIndex == USEARCH_DONE ||
461 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
462 m_breakiterator_->isBoundary(m_search_->matchedIndex +
463 m_search_->matchedLength))
464#endif
465 ) {
466 return m_search_->matchedIndex;
467 }
468 }
469 }
470
471 return m_search_->matchedIndex;
472 }
473 return USEARCH_DONE;
474}
475
476U_NAMESPACE_END
477
478#endif /* #if !UCONFIG_NO_COLLATION */