]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ********************************************************************** | |
3 | * Copyright (C) 2001 IBM and others. All rights reserved. | |
4 | ********************************************************************** | |
5 | * Date Name Description | |
6 | * 03/22/2000 helena Creation. | |
7 | ********************************************************************** | |
8 | */ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | ||
12 | #if !UCONFIG_NO_COLLATION | |
13 | ||
14 | #include "unicode/brkiter.h" | |
15 | #include "unicode/schriter.h" | |
16 | #include "unicode/search.h" | |
17 | #include "usrchimp.h" | |
18 | #include "cmemory.h" | |
19 | ||
20 | // public constructors and destructors ----------------------------------- | |
21 | U_NAMESPACE_BEGIN | |
22 | ||
23 | SearchIterator::SearchIterator(const SearchIterator &other) | |
24 | : UObject(other) | |
25 | { | |
26 | m_breakiterator_ = other.m_breakiterator_; | |
27 | m_text_ = other.m_text_; | |
28 | m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); | |
29 | m_search_->breakIter = other.m_search_->breakIter; | |
30 | m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; | |
31 | m_search_->isOverlap = other.m_search_->isOverlap; | |
32 | m_search_->matchedIndex = other.m_search_->matchedIndex; | |
33 | m_search_->matchedLength = other.m_search_->matchedLength; | |
34 | m_search_->text = other.m_search_->text; | |
35 | m_search_->textLength = other.m_search_->textLength; | |
36 | } | |
37 | ||
38 | SearchIterator::~SearchIterator() | |
39 | { | |
40 | if (m_search_ != NULL) { | |
41 | uprv_free(m_search_); | |
42 | } | |
43 | } | |
44 | ||
45 | // public get and set methods ---------------------------------------- | |
46 | ||
47 | void SearchIterator::setAttribute(USearchAttribute attribute, | |
48 | USearchAttributeValue value, | |
49 | UErrorCode &status) | |
50 | { | |
51 | if (U_SUCCESS(status)) { | |
52 | switch (attribute) | |
53 | { | |
54 | case USEARCH_OVERLAP : | |
55 | m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); | |
56 | break; | |
57 | case USEARCH_CANONICAL_MATCH : | |
58 | m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); | |
59 | break; | |
60 | default: | |
61 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
62 | } | |
63 | } | |
64 | if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { | |
65 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
66 | } | |
67 | } | |
68 | ||
69 | USearchAttributeValue SearchIterator::getAttribute( | |
70 | USearchAttribute attribute) const | |
71 | { | |
72 | switch (attribute) { | |
73 | case USEARCH_OVERLAP : | |
74 | return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); | |
75 | case USEARCH_CANONICAL_MATCH : | |
76 | return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : | |
77 | USEARCH_OFF); | |
78 | default : | |
79 | return USEARCH_DEFAULT; | |
80 | } | |
81 | } | |
82 | ||
83 | int32_t SearchIterator::getMatchedStart() const | |
84 | { | |
85 | return m_search_->matchedIndex; | |
86 | } | |
87 | ||
88 | int32_t SearchIterator::getMatchedLength() const | |
89 | { | |
90 | return m_search_->matchedLength; | |
91 | } | |
92 | ||
93 | void SearchIterator::getMatchedText(UnicodeString &result) const | |
94 | { | |
95 | int32_t matchedindex = m_search_->matchedIndex; | |
96 | int32_t matchedlength = m_search_->matchedLength; | |
97 | if (matchedindex != USEARCH_DONE && matchedlength != 0) { | |
98 | result.setTo(m_search_->text + matchedindex, matchedlength); | |
99 | } | |
100 | else { | |
101 | result.remove(); | |
102 | } | |
103 | } | |
104 | ||
105 | void SearchIterator::setBreakIterator(BreakIterator *breakiter, | |
106 | UErrorCode &status) | |
107 | { | |
108 | if (U_SUCCESS(status)) { | |
109 | m_search_->breakIter = NULL; | |
110 | // the c++ breakiterator may not make use of ubreakiterator. | |
111 | // so we'll have to keep track of it ourselves. | |
112 | m_breakiterator_ = breakiter; | |
113 | } | |
114 | } | |
115 | ||
116 | const BreakIterator * SearchIterator::getBreakIterator(void) const | |
117 | { | |
118 | return m_breakiterator_; | |
119 | } | |
120 | ||
121 | void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) | |
122 | { | |
123 | if (U_SUCCESS(status)) { | |
124 | if (text.length() == 0) { | |
125 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
126 | } | |
127 | else { | |
128 | m_text_ = text; | |
129 | m_search_->text = m_text_.getBuffer(); | |
130 | m_search_->textLength = m_text_.length(); | |
131 | } | |
132 | } | |
133 | } | |
134 | ||
135 | void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) | |
136 | { | |
137 | if (U_SUCCESS(status)) { | |
138 | text.getText(m_text_); | |
139 | setText(m_text_, status); | |
140 | } | |
141 | } | |
142 | ||
143 | const UnicodeString & SearchIterator::getText(void) const | |
144 | { | |
145 | return m_text_; | |
146 | } | |
147 | ||
148 | // operator overloading ---------------------------------------------- | |
149 | ||
150 | UBool SearchIterator::operator==(const SearchIterator &that) const | |
151 | { | |
152 | if (this == &that) { | |
153 | return TRUE; | |
154 | } | |
155 | return (m_breakiterator_ == that.m_breakiterator_ && | |
156 | m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && | |
157 | m_search_->isOverlap == that.m_search_->isOverlap && | |
158 | m_search_->matchedIndex == that.m_search_->matchedIndex && | |
159 | m_search_->matchedLength == that.m_search_->matchedLength && | |
160 | m_search_->textLength == that.m_search_->textLength && | |
161 | getOffset() == that.getOffset() && | |
162 | (uprv_memcmp(m_search_->text, that.m_search_->text, | |
163 | m_search_->textLength * sizeof(UChar)) == 0)); | |
164 | } | |
165 | ||
166 | // public methods ---------------------------------------------------- | |
167 | ||
168 | int32_t SearchIterator::first(UErrorCode &status) | |
169 | { | |
170 | if (U_FAILURE(status)) { | |
171 | return USEARCH_DONE; | |
172 | } | |
173 | setOffset(0, status); | |
174 | return handleNext(0, status); | |
175 | } | |
176 | ||
177 | int32_t SearchIterator::following(int32_t position, | |
178 | UErrorCode &status) | |
179 | { | |
180 | if (U_FAILURE(status)) { | |
181 | return USEARCH_DONE; | |
182 | } | |
183 | setOffset(position, status); | |
184 | return handleNext(position, status); | |
185 | } | |
186 | ||
187 | int32_t SearchIterator::last(UErrorCode &status) | |
188 | { | |
189 | if (U_FAILURE(status)) { | |
190 | return USEARCH_DONE; | |
191 | } | |
192 | setOffset(m_search_->textLength, status); | |
193 | return handlePrev(m_search_->textLength, status); | |
194 | } | |
195 | ||
196 | int32_t SearchIterator::preceding(int32_t position, | |
197 | UErrorCode &status) | |
198 | { | |
199 | if (U_FAILURE(status)) { | |
200 | return USEARCH_DONE; | |
201 | } | |
202 | setOffset(position, status); | |
203 | return handlePrev(position, status); | |
204 | } | |
205 | ||
206 | int32_t SearchIterator::next(UErrorCode &status) | |
207 | { | |
208 | if (U_SUCCESS(status)) { | |
209 | int32_t offset = getOffset(); | |
210 | int32_t matchindex = m_search_->matchedIndex; | |
211 | int32_t matchlength = m_search_->matchedLength; | |
212 | m_search_->reset = FALSE; | |
213 | if (m_search_->isForwardSearching == TRUE) { | |
214 | int32_t textlength = m_search_->textLength; | |
215 | if (offset == textlength || matchindex == textlength || | |
216 | (matchindex != USEARCH_DONE && | |
217 | matchindex + matchlength >= textlength)) { | |
218 | // not enough characters to match | |
219 | setMatchNotFound(); | |
220 | return USEARCH_DONE; | |
221 | } | |
222 | } | |
223 | else { | |
224 | // switching direction. | |
225 | // if matchedIndex == USEARCH_DONE, it means that either a | |
226 | // setOffset has been called or that previous ran off the text | |
227 | // string. the iterator would have been set to offset 0 if a | |
228 | // match is not found. | |
229 | m_search_->isForwardSearching = TRUE; | |
230 | if (m_search_->matchedIndex != USEARCH_DONE) { | |
231 | // there's no need to set the collation element iterator | |
232 | // the next call to next will set the offset. | |
233 | return matchindex; | |
234 | } | |
235 | } | |
236 | ||
237 | if (matchlength > 0) { | |
374ca955 A |
238 | // if matchlength is 0 we are at the start of the iteration |
239 | if (m_search_->isOverlap) { | |
240 | offset ++; | |
241 | } | |
242 | else { | |
243 | offset += matchlength; | |
244 | } | |
245 | } | |
b75a7d8f A |
246 | return handleNext(offset, status); |
247 | } | |
248 | return USEARCH_DONE; | |
249 | } | |
250 | ||
251 | int32_t SearchIterator::previous(UErrorCode &status) | |
252 | { | |
253 | if (U_SUCCESS(status)) { | |
254 | int32_t offset; | |
255 | if (m_search_->reset) { | |
256 | offset = m_search_->textLength; | |
257 | m_search_->isForwardSearching = FALSE; | |
258 | m_search_->reset = FALSE; | |
259 | setOffset(offset, status); | |
260 | } | |
261 | else { | |
262 | offset = getOffset(); | |
263 | } | |
264 | ||
265 | int32_t matchindex = m_search_->matchedIndex; | |
266 | if (m_search_->isForwardSearching == TRUE) { | |
267 | // switching direction. | |
268 | // if matchedIndex == USEARCH_DONE, it means that either a | |
269 | // setOffset has been called or that next ran off the text | |
270 | // string. the iterator would have been set to offset textLength if | |
271 | // a match is not found. | |
272 | m_search_->isForwardSearching = FALSE; | |
273 | if (matchindex != USEARCH_DONE) { | |
274 | return matchindex; | |
275 | } | |
276 | } | |
277 | else { | |
278 | if (offset == 0 || matchindex == 0) { | |
279 | // not enough characters to match | |
280 | setMatchNotFound(); | |
281 | return USEARCH_DONE; | |
282 | } | |
283 | } | |
284 | ||
285 | if (matchindex != USEARCH_DONE) { | |
286 | return handlePrev(matchindex, status); | |
287 | } | |
288 | return handlePrev(offset, status); | |
289 | } | |
290 | return USEARCH_DONE; | |
291 | } | |
292 | ||
293 | void SearchIterator::reset() | |
294 | { | |
295 | UErrorCode status = U_ZERO_ERROR; | |
296 | setMatchNotFound(); | |
297 | setOffset(0, status); | |
298 | m_search_->isOverlap = FALSE; | |
299 | m_search_->isCanonicalMatch = FALSE; | |
300 | m_search_->isForwardSearching = TRUE; | |
301 | m_search_->reset = TRUE; | |
302 | } | |
303 | ||
304 | // protected constructors and destructors ----------------------------- | |
305 | ||
306 | SearchIterator::SearchIterator() | |
307 | { | |
308 | m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); | |
309 | m_search_->breakIter = NULL; | |
310 | m_search_->isOverlap = FALSE; | |
311 | m_search_->isCanonicalMatch = FALSE; | |
312 | m_search_->isForwardSearching = TRUE; | |
313 | m_search_->reset = TRUE; | |
314 | m_search_->matchedIndex = USEARCH_DONE; | |
315 | m_search_->matchedLength = 0; | |
316 | m_search_->text = NULL; | |
317 | m_search_->textLength = 0; | |
318 | } | |
319 | ||
320 | SearchIterator::SearchIterator(const UnicodeString &text, | |
321 | BreakIterator *breakiter) : | |
322 | m_breakiterator_(breakiter), | |
323 | m_text_(text) | |
324 | { | |
325 | m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); | |
326 | m_search_->breakIter = NULL; | |
327 | m_search_->isOverlap = FALSE; | |
328 | m_search_->isCanonicalMatch = FALSE; | |
329 | m_search_->isForwardSearching = TRUE; | |
330 | m_search_->reset = TRUE; | |
331 | m_search_->matchedIndex = USEARCH_DONE; | |
332 | m_search_->matchedLength = 0; | |
333 | m_search_->text = m_text_.getBuffer(); | |
334 | m_search_->textLength = text.length(); | |
335 | } | |
336 | ||
337 | SearchIterator::SearchIterator(CharacterIterator &text, | |
338 | BreakIterator *breakiter) : | |
339 | m_breakiterator_(breakiter) | |
340 | { | |
341 | m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); | |
342 | m_search_->breakIter = NULL; | |
343 | m_search_->isOverlap = FALSE; | |
344 | m_search_->isCanonicalMatch = FALSE; | |
345 | m_search_->isForwardSearching = TRUE; | |
346 | m_search_->reset = TRUE; | |
347 | m_search_->matchedIndex = USEARCH_DONE; | |
348 | m_search_->matchedLength = 0; | |
349 | text.getText(m_text_); | |
350 | m_search_->text = m_text_.getBuffer(); | |
351 | m_search_->textLength = m_text_.length(); | |
352 | m_breakiterator_ = breakiter; | |
353 | } | |
354 | ||
355 | // protected methods ------------------------------------------------------ | |
356 | ||
357 | SearchIterator & SearchIterator::operator=(const SearchIterator &that) | |
358 | { | |
359 | if (this != &that) { | |
360 | m_breakiterator_ = that.m_breakiterator_; | |
361 | m_text_ = that.m_text_; | |
362 | m_search_->breakIter = that.m_search_->breakIter; | |
363 | m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; | |
364 | m_search_->isOverlap = that.m_search_->isOverlap; | |
365 | m_search_->matchedIndex = that.m_search_->matchedIndex; | |
366 | m_search_->matchedLength = that.m_search_->matchedLength; | |
367 | m_search_->text = that.m_search_->text; | |
368 | m_search_->textLength = that.m_search_->textLength; | |
369 | } | |
370 | return *this; | |
371 | } | |
372 | ||
373 | void SearchIterator::setMatchLength(int32_t length) | |
374 | { | |
375 | m_search_->matchedLength = length; | |
376 | } | |
377 | ||
378 | void SearchIterator::setMatchStart(int32_t position) | |
379 | { | |
380 | m_search_->matchedIndex = position; | |
381 | } | |
382 | ||
383 | void SearchIterator::setMatchNotFound() | |
384 | { | |
385 | setMatchStart(USEARCH_DONE); | |
386 | setMatchLength(0); | |
387 | UErrorCode status = U_ZERO_ERROR; | |
388 | // by default no errors should be returned here since offsets are within | |
389 | // range. | |
390 | if (m_search_->isForwardSearching) { | |
391 | setOffset(m_search_->textLength, status); | |
392 | } | |
393 | else { | |
394 | setOffset(0, status); | |
395 | } | |
396 | } | |
397 | ||
398 | ||
399 | U_NAMESPACE_END | |
400 | ||
401 | #endif /* #if !UCONFIG_NO_COLLATION */ |