]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/stsearch.cpp
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / i18n / stsearch.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
3* Copyright (C) 2001-2003 IBM and others. All rights reserved.
4**********************************************************************
5* Date Name Description
6* 03/22/2000 helena Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_COLLATION
13
14#include "unicode/stsearch.h"
15#include "usrchimp.h"
16#include "cmemory.h"
17
18U_NAMESPACE_BEGIN
19
20const char StringSearch::fgClassID=0;
21
22// public constructors and destructors -----------------------------------
23
24StringSearch::StringSearch(const UnicodeString &pattern,
25 const UnicodeString &text,
26 const Locale &locale,
27 BreakIterator *breakiter,
28 UErrorCode &status) :
29 SearchIterator(text, breakiter),
30 m_collator_(),
31 m_pattern_(pattern)
32{
33 if (U_FAILURE(status)) {
34 m_strsrch_ = NULL;
35 return;
36 }
37
38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39 m_text_.getBuffer(), m_text_.length(),
40 locale.getName(), (UBreakIterator *)breakiter,
41 &status);
42 uprv_free(m_search_);
43 m_search_ = NULL;
44
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
49 // to the exposed collator (setStrength etc) _should_ modify the
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
54
55 if (U_SUCCESS(status)) {
56 int32_t length;
57 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
58 m_collation_rules_.setTo(rules, length);
59 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
60 &m_collation_rules_);
61 // m_search_ has been created by the base SearchIterator class
62 m_search_ = m_strsrch_->search;
63 }
64}
65
66StringSearch::StringSearch(const UnicodeString &pattern,
67 const UnicodeString &text,
68 RuleBasedCollator *coll,
69 BreakIterator *breakiter,
70 UErrorCode &status) :
71 SearchIterator(text, breakiter),
72 m_collator_(),
73 m_pattern_(pattern)
74{
75 if (U_FAILURE(status)) {
76 m_strsrch_ = NULL;
77 return;
78 }
79 if (coll == NULL) {
80 status = U_ILLEGAL_ARGUMENT_ERROR;
81 m_strsrch_ = NULL;
82 return;
83 }
84 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
85 m_pattern_.length(),
86 m_text_.getBuffer(),
87 m_text_.length(), coll->ucollator,
88 (UBreakIterator *)breakiter,
89 &status);
90 uprv_free(m_search_);
91 m_search_ = NULL;
92
93 if (U_SUCCESS(status)) {
94 int32_t length;
95 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
96 m_collation_rules_.setTo(rules, length);
97 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
98 &m_collation_rules_);
99 // m_search_ has been created by the base SearchIterator class
100 m_search_ = m_strsrch_->search;
101 }
102}
103
104StringSearch::StringSearch(const UnicodeString &pattern,
105 CharacterIterator &text,
106 const Locale &locale,
107 BreakIterator *breakiter,
108 UErrorCode &status) :
109 SearchIterator(text, breakiter),
110 m_collator_(),
111 m_pattern_(pattern)
112{
113 if (U_FAILURE(status)) {
114 m_strsrch_ = NULL;
115 return;
116 }
117 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
118 m_text_.getBuffer(), m_text_.length(),
119 locale.getName(), (UBreakIterator *)breakiter,
120 &status);
121 uprv_free(m_search_);
122 m_search_ = NULL;
123
124 if (U_SUCCESS(status)) {
125 int32_t length;
126 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
127 m_collation_rules_.setTo(rules, length);
128 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
129 &m_collation_rules_);
130 // m_search_ has been created by the base SearchIterator class
131 m_search_ = m_strsrch_->search;
132 }
133}
134
135StringSearch::StringSearch(const UnicodeString &pattern,
136 CharacterIterator &text,
137 RuleBasedCollator *coll,
138 BreakIterator *breakiter,
139 UErrorCode &status) :
140 SearchIterator(text, breakiter),
141 m_collator_(),
142 m_pattern_(pattern)
143{
144 if (U_FAILURE(status)) {
145 m_strsrch_ = NULL;
146 return;
147 }
148 if (coll == NULL) {
149 status = U_ILLEGAL_ARGUMENT_ERROR;
150 m_strsrch_ = NULL;
151 return;
152 }
153 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
154 m_pattern_.length(),
155 m_text_.getBuffer(),
156 m_text_.length(), coll->ucollator,
157 (UBreakIterator *)breakiter,
158 &status);
159 uprv_free(m_search_);
160 m_search_ = NULL;
161
162 if (U_SUCCESS(status)) {
163 int32_t length;
164 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
165 m_collation_rules_.setTo(rules, length);
166 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
167 &m_collation_rules_);
168 // m_search_ has been created by the base SearchIterator class
169 m_search_ = m_strsrch_->search;
170 }
171}
172
173StringSearch::StringSearch(const StringSearch &that) :
174 SearchIterator(that.m_text_, that.m_breakiterator_),
175 m_collator_(),
176 m_pattern_(that.m_pattern_)
177{
178 UErrorCode status = U_ZERO_ERROR;
179 if (that.m_strsrch_ == NULL) {
180 m_strsrch_ = NULL;
181 status = U_ILLEGAL_ARGUMENT_ERROR;
182 }
183 else {
184 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
185 m_pattern_.length(),
186 m_text_.getBuffer(),
187 m_text_.length(),
188 that.m_strsrch_->collator,
189 (UBreakIterator *)that.m_breakiterator_,
190 &status);
191 }
192 uprv_free(m_search_);
193 m_search_ = NULL;
194
195 if (U_SUCCESS(status)) {
196 int32_t length;
197 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
198 m_collation_rules_.setTo(rules, length);
199 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
200 &m_collation_rules_);
201 // m_search_ has been created by the base SearchIterator class
202 m_search_ = m_strsrch_->search;
203 m_breakiterator_ = that.m_breakiterator_;
204 }
205}
206
207StringSearch::~StringSearch()
208{
209 if (m_strsrch_ != NULL) {
210 usearch_close(m_strsrch_);
211 m_search_ = NULL;
212 }
213}
214
215// operator overloading ---------------------------------------------
216StringSearch & StringSearch::operator=(const StringSearch &that)
217{
218 if ((*this) != that) {
219 UErrorCode status = U_ZERO_ERROR;
220 m_text_ = that.m_text_;
221 m_breakiterator_ = that.m_breakiterator_;
222 m_pattern_ = that.m_pattern_;
223 // all m_search_ in the parent class is linked up with m_strsrch_
224 usearch_close(m_strsrch_);
225 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
226 m_pattern_.length(),
227 m_text_.getBuffer(),
228 m_text_.length(),
229 that.m_strsrch_->collator,
230 NULL, &status);
231 int32_t length;
232 const UChar *rules = ucol_getRules(m_strsrch_->collator, &length);
233 m_collation_rules_.setTo(rules, length);
234 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
235 &m_collation_rules_);
236 m_search_ = m_strsrch_->search;
237 }
238 return *this;
239}
240
241UBool StringSearch::operator==(const SearchIterator &that) const
242{
243 if (this == &that) {
244 return TRUE;
245 }
246 if (SearchIterator::operator ==(that)) {
247 StringSearch &thatsrch = (StringSearch &)that;
248 return (this->m_pattern_ == thatsrch.m_pattern_ &&
249 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
250 }
251 return FALSE;
252}
253
254// public get and set methods ----------------------------------------
255
256void StringSearch::setOffset(int32_t position, UErrorCode &status)
257{
258 // status checked in usearch_setOffset
259 usearch_setOffset(m_strsrch_, position, &status);
260}
261
262int32_t StringSearch::getOffset(void) const
263{
264 return usearch_getOffset(m_strsrch_);
265}
266
267void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
268{
269 if (U_SUCCESS(status)) {
270 m_text_ = text;
271 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
272 }
273}
274
275void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
276{
277 if (U_SUCCESS(status)) {
278 text.getText(m_text_);
279 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
280 }
281}
282
283RuleBasedCollator * StringSearch::getCollator() const
284{
285 return (RuleBasedCollator *)&m_collator_;
286}
287
288void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
289{
290 if (U_SUCCESS(status)) {
291 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
292 m_collation_rules_.setTo(coll->getRules());
293 m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
294 &m_collation_rules_);
295 }
296}
297
298void StringSearch::setPattern(const UnicodeString &pattern,
299 UErrorCode &status)
300{
301 if (U_SUCCESS(status)) {
302 m_pattern_ = pattern;
303 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
304 &status);
305 }
306}
307
308const UnicodeString & StringSearch::getPattern() const
309{
310 return m_pattern_;
311}
312
313// public methods ----------------------------------------------------
314
315void StringSearch::reset()
316{
317 usearch_reset(m_strsrch_);
318}
319
320SearchIterator * StringSearch::safeClone(void) const
321{
322 UErrorCode status = U_ZERO_ERROR;
323 StringSearch *result = new StringSearch(m_pattern_, m_text_,
324 (RuleBasedCollator *)&m_collator_,
325 m_breakiterator_,
326 status);
327 /* test for NULL */
328 if (result == 0) {
329 status = U_MEMORY_ALLOCATION_ERROR;
330 return 0;
331 }
332 result->setOffset(getOffset(), status);
333 result->setMatchStart(m_strsrch_->search->matchedIndex);
334 result->setMatchLength(m_strsrch_->search->matchedLength);
335 if (U_FAILURE(status)) {
336 return NULL;
337 }
338 return result;
339}
340
341// protected method -------------------------------------------------
342
343int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
344{
345 // values passed here are already in the pre-shift position
346 if (U_SUCCESS(status)) {
347 if (m_strsrch_->pattern.CELength == 0) {
348 m_search_->matchedIndex =
349 m_search_->matchedIndex == USEARCH_DONE ?
350 getOffset() : m_search_->matchedIndex + 1;
351 m_search_->matchedLength = 0;
352 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
353 &status);
354 if (m_search_->matchedIndex == m_search_->textLength) {
355 m_search_->matchedIndex = USEARCH_DONE;
356 }
357 }
358 else {
359 // looking at usearch.cpp, this part is shifted out to
360 // StringSearch instead of SearchIterator because m_strsrch_ is
361 // not accessible in SearchIterator
362 if (position + m_strsrch_->pattern.defaultShiftSize
363 > m_search_->textLength) {
364 setMatchNotFound();
365 return USEARCH_DONE;
366 }
367 ucol_setOffset(m_strsrch_->textIter, position, &status);
368 while (TRUE) {
369 if (m_search_->isCanonicalMatch) {
370 // can't use exact here since extra accents are allowed.
371 usearch_handleNextCanonical(m_strsrch_, &status);
372 }
373 else {
374 usearch_handleNextExact(m_strsrch_, &status);
375 }
376 if (U_FAILURE(status)) {
377 return USEARCH_DONE;
378 }
379 if (m_breakiterator_ == NULL
380#if !UCONFIG_NO_BREAK_ITERATION
381 ||
382 m_search_->matchedIndex == USEARCH_DONE ||
383 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
384 m_breakiterator_->isBoundary(m_search_->matchedIndex +
385 m_search_->matchedLength))
386#endif
387 ) {
388 if (m_search_->matchedIndex == USEARCH_DONE) {
389 ucol_setOffset(m_strsrch_->textIter,
390 m_search_->textLength, &status);
391 }
392 else {
393 ucol_setOffset(m_strsrch_->textIter,
394 m_search_->matchedIndex, &status);
395 }
396 return m_search_->matchedIndex;
397 }
398 }
399 }
400 }
401 return USEARCH_DONE;
402}
403
404int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
405{
406 // values passed here are already in the pre-shift position
407 if (U_SUCCESS(status)) {
408 if (m_strsrch_->pattern.CELength == 0) {
409 m_search_->matchedIndex =
410 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
411 m_search_->matchedIndex);
412 if (m_search_->matchedIndex == 0) {
413 setMatchNotFound();
414 }
415 else {
416 m_search_->matchedIndex --;
417 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
418 &status);
419 m_search_->matchedLength = 0;
420 }
421 }
422 else {
423 // looking at usearch.cpp, this part is shifted out to
424 // StringSearch instead of SearchIterator because m_strsrch_ is
425 // not accessible in SearchIterator
426 if (!m_search_->isOverlap &&
427 position - m_strsrch_->pattern.defaultShiftSize < 0) {
428 setMatchNotFound();
429 return USEARCH_DONE;
430 }
431 while (TRUE) {
432 if (m_search_->isCanonicalMatch) {
433 // can't use exact here since extra accents are allowed.
434 usearch_handlePreviousCanonical(m_strsrch_, &status);
435 }
436 else {
437 usearch_handlePreviousExact(m_strsrch_, &status);
438 }
439 if (U_FAILURE(status)) {
440 return USEARCH_DONE;
441 }
442 if (m_breakiterator_ == NULL
443#if !UCONFIG_NO_BREAK_ITERATION
444 ||
445 m_search_->matchedIndex == USEARCH_DONE ||
446 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
447 m_breakiterator_->isBoundary(m_search_->matchedIndex +
448 m_search_->matchedLength))
449#endif
450 ) {
451 return m_search_->matchedIndex;
452 }
453 }
454 }
455
456 return m_search_->matchedIndex;
457 }
458 return USEARCH_DONE;
459}
460
461U_NAMESPACE_END
462
463#endif /* #if !UCONFIG_NO_COLLATION */