]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
729e4ab9 | 3 | * Copyright (C) 1996-2010, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ******************************************************************************* | |
6 | */ | |
7 | ||
8 | /* | |
9 | * File coleitr.cpp | |
10 | * | |
11 | * | |
12 | * | |
13 | * Created by: Helena Shih | |
14 | * | |
15 | * Modification History: | |
16 | * | |
17 | * Date Name Description | |
18 | * | |
19 | * 6/23/97 helena Adding comments to make code more readable. | |
20 | * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java | |
21 | * 12/10/99 aliu Ported Thai collation support from Java. | |
22 | * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) | |
23 | * 02/19/01 swquek Removed CollationElementsIterator() since it is | |
24 | * private constructor and no calls are made to it | |
25 | */ | |
26 | ||
27 | #include "unicode/utypes.h" | |
28 | ||
29 | #if !UCONFIG_NO_COLLATION | |
30 | ||
31 | #include "unicode/coleitr.h" | |
32 | #include "unicode/ustring.h" | |
33 | #include "ucol_imp.h" | |
34 | #include "cmemory.h" | |
35 | ||
36 | ||
37 | /* Constants --------------------------------------------------------------- */ | |
38 | ||
39 | U_NAMESPACE_BEGIN | |
40 | ||
374ca955 | 41 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) |
b75a7d8f | 42 | |
b75a7d8f A |
43 | /* CollationElementIterator public constructor/destructor ------------------ */ |
44 | ||
45 | CollationElementIterator::CollationElementIterator( | |
46 | const CollationElementIterator& other) | |
47 | : UObject(other), isDataOwned_(TRUE) | |
48 | { | |
73c04bcf A |
49 | UErrorCode status = U_ZERO_ERROR; |
50 | m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, | |
51 | &status); | |
b75a7d8f | 52 | |
73c04bcf | 53 | *this = other; |
b75a7d8f A |
54 | } |
55 | ||
56 | CollationElementIterator::~CollationElementIterator() | |
57 | { | |
73c04bcf A |
58 | if (isDataOwned_) { |
59 | ucol_closeElements(m_data_); | |
60 | } | |
b75a7d8f A |
61 | } |
62 | ||
63 | /* CollationElementIterator public methods --------------------------------- */ | |
64 | ||
65 | int32_t CollationElementIterator::getOffset() const | |
66 | { | |
73c04bcf | 67 | return ucol_getOffset(m_data_); |
b75a7d8f A |
68 | } |
69 | ||
70 | /** | |
71 | * Get the ordering priority of the next character in the string. | |
72 | * @return the next character's ordering. Returns NULLORDER if an error has | |
73 | * occured or if the end of string has been reached | |
74 | */ | |
75 | int32_t CollationElementIterator::next(UErrorCode& status) | |
76 | { | |
73c04bcf | 77 | return ucol_next(m_data_, &status); |
b75a7d8f A |
78 | } |
79 | ||
80 | UBool CollationElementIterator::operator!=( | |
81 | const CollationElementIterator& other) const | |
82 | { | |
73c04bcf | 83 | return !(*this == other); |
b75a7d8f A |
84 | } |
85 | ||
86 | UBool CollationElementIterator::operator==( | |
87 | const CollationElementIterator& that) const | |
88 | { | |
46f4442e | 89 | if (this == &that || m_data_ == that.m_data_) { |
b75a7d8f A |
90 | return TRUE; |
91 | } | |
92 | ||
93 | // option comparison | |
374ca955 | 94 | if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) |
b75a7d8f A |
95 | { |
96 | return FALSE; | |
97 | } | |
98 | ||
99 | // the constructor and setText always sets a length | |
100 | // and we only compare the string not the contents of the normalization | |
101 | // buffer | |
729e4ab9 A |
102 | int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string); |
103 | int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string); | |
b75a7d8f A |
104 | |
105 | if (thislength != thatlength) { | |
106 | return FALSE; | |
107 | } | |
108 | ||
109 | if (uprv_memcmp(m_data_->iteratordata_.string, | |
110 | that.m_data_->iteratordata_.string, | |
111 | thislength * U_SIZEOF_UCHAR) != 0) { | |
112 | return FALSE; | |
113 | } | |
114 | if (getOffset() != that.getOffset()) { | |
115 | return FALSE; | |
116 | } | |
117 | ||
118 | // checking normalization buffer | |
119 | if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { | |
46f4442e | 120 | if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { |
b75a7d8f A |
121 | return FALSE; |
122 | } | |
123 | // both are in the normalization buffer | |
124 | if (m_data_->iteratordata_.pos | |
729e4ab9 | 125 | - m_data_->iteratordata_.writableBuffer.getBuffer() |
b75a7d8f | 126 | != that.m_data_->iteratordata_.pos |
729e4ab9 | 127 | - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { |
b75a7d8f A |
128 | // not in the same position in the normalization buffer |
129 | return FALSE; | |
130 | } | |
131 | } | |
46f4442e | 132 | else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { |
b75a7d8f A |
133 | return FALSE; |
134 | } | |
135 | // checking ce position | |
136 | return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) | |
137 | == (that.m_data_->iteratordata_.CEpos | |
138 | - that.m_data_->iteratordata_.CEs); | |
139 | } | |
140 | ||
141 | /** | |
142 | * Get the ordering priority of the previous collation element in the string. | |
143 | * @param status the error code status. | |
144 | * @return the previous element's ordering. Returns NULLORDER if an error has | |
145 | * occured or if the start of string has been reached. | |
146 | */ | |
147 | int32_t CollationElementIterator::previous(UErrorCode& status) | |
148 | { | |
73c04bcf | 149 | return ucol_previous(m_data_, &status); |
b75a7d8f A |
150 | } |
151 | ||
152 | /** | |
153 | * Resets the cursor to the beginning of the string. | |
154 | */ | |
155 | void CollationElementIterator::reset() | |
156 | { | |
73c04bcf | 157 | ucol_reset(m_data_); |
b75a7d8f A |
158 | } |
159 | ||
160 | void CollationElementIterator::setOffset(int32_t newOffset, | |
161 | UErrorCode& status) | |
162 | { | |
73c04bcf | 163 | ucol_setOffset(m_data_, newOffset, &status); |
b75a7d8f A |
164 | } |
165 | ||
166 | /** | |
167 | * Sets the source to the new source string. | |
168 | */ | |
169 | void CollationElementIterator::setText(const UnicodeString& source, | |
170 | UErrorCode& status) | |
171 | { | |
73c04bcf | 172 | if (U_FAILURE(status)) { |
b75a7d8f A |
173 | return; |
174 | } | |
73c04bcf A |
175 | |
176 | int32_t length = source.length(); | |
177 | UChar *string = NULL; | |
178 | if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { | |
729e4ab9 | 179 | uprv_free((UChar *)m_data_->iteratordata_.string); |
73c04bcf A |
180 | } |
181 | m_data_->isWritable = TRUE; | |
182 | if (length > 0) { | |
183 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
184 | /* test for NULL */ | |
185 | if (string == NULL) { | |
186 | status = U_MEMORY_ALLOCATION_ERROR; | |
187 | return; | |
188 | } | |
189 | u_memcpy(string, source.getBuffer(), length); | |
190 | } | |
191 | else { | |
192 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
193 | /* test for NULL */ | |
194 | if (string == NULL) { | |
195 | status = U_MEMORY_ALLOCATION_ERROR; | |
196 | return; | |
197 | } | |
198 | *string = 0; | |
b75a7d8f | 199 | } |
46f4442e | 200 | /* Free offsetBuffer before initializing it. */ |
729e4ab9 | 201 | ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
73c04bcf | 202 | uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, |
729e4ab9 | 203 | &m_data_->iteratordata_, &status); |
b75a7d8f | 204 | |
73c04bcf | 205 | m_data_->reset_ = TRUE; |
b75a7d8f A |
206 | } |
207 | ||
208 | // Sets the source to the new character iterator. | |
209 | void CollationElementIterator::setText(CharacterIterator& source, | |
210 | UErrorCode& status) | |
211 | { | |
73c04bcf | 212 | if (U_FAILURE(status)) |
b75a7d8f | 213 | return; |
73c04bcf A |
214 | |
215 | int32_t length = source.getLength(); | |
216 | UChar *buffer = NULL; | |
217 | ||
218 | if (length == 0) { | |
219 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
220 | /* test for NULL */ | |
221 | if (buffer == NULL) { | |
222 | status = U_MEMORY_ALLOCATION_ERROR; | |
223 | return; | |
224 | } | |
225 | *buffer = 0; | |
b75a7d8f | 226 | } |
73c04bcf A |
227 | else { |
228 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
229 | /* test for NULL */ | |
230 | if (buffer == NULL) { | |
231 | status = U_MEMORY_ALLOCATION_ERROR; | |
232 | return; | |
233 | } | |
234 | /* | |
235 | Using this constructor will prevent buffer from being removed when | |
236 | string gets removed | |
237 | */ | |
238 | UnicodeString string; | |
239 | source.getText(string); | |
240 | u_memcpy(buffer, string.getBuffer(), length); | |
241 | } | |
242 | ||
243 | if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { | |
729e4ab9 | 244 | uprv_free((UChar *)m_data_->iteratordata_.string); |
73c04bcf A |
245 | } |
246 | m_data_->isWritable = TRUE; | |
46f4442e | 247 | /* Free offsetBuffer before initializing it. */ |
729e4ab9 | 248 | ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
73c04bcf | 249 | uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, |
729e4ab9 | 250 | &m_data_->iteratordata_, &status); |
73c04bcf | 251 | m_data_->reset_ = TRUE; |
b75a7d8f A |
252 | } |
253 | ||
254 | int32_t CollationElementIterator::strengthOrder(int32_t order) const | |
255 | { | |
73c04bcf A |
256 | UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); |
257 | // Mask off the unwanted differences. | |
258 | if (s == UCOL_PRIMARY) { | |
259 | order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; | |
260 | } | |
261 | else if (s == UCOL_SECONDARY) { | |
262 | order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; | |
263 | } | |
264 | ||
265 | return order; | |
b75a7d8f A |
266 | } |
267 | ||
268 | /* CollationElementIterator private constructors/destructors --------------- */ | |
269 | ||
270 | /** | |
271 | * This is the "real" constructor for this class; it constructs an iterator | |
272 | * over the source text using the specified collator | |
273 | */ | |
274 | CollationElementIterator::CollationElementIterator( | |
275 | const UnicodeString& sourceText, | |
276 | const RuleBasedCollator* order, | |
277 | UErrorCode& status) | |
278 | : isDataOwned_(TRUE) | |
279 | { | |
73c04bcf | 280 | if (U_FAILURE(status)) { |
b75a7d8f | 281 | return; |
73c04bcf A |
282 | } |
283 | ||
284 | int32_t length = sourceText.length(); | |
285 | UChar *string = NULL; | |
286 | ||
287 | if (length > 0) { | |
288 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
289 | /* test for NULL */ | |
290 | if (string == NULL) { | |
291 | status = U_MEMORY_ALLOCATION_ERROR; | |
292 | return; | |
293 | } | |
294 | /* | |
295 | Using this constructor will prevent buffer from being removed when | |
296 | string gets removed | |
297 | */ | |
298 | u_memcpy(string, sourceText.getBuffer(), length); | |
299 | } | |
300 | else { | |
301 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
302 | /* test for NULL */ | |
303 | if (string == NULL) { | |
304 | status = U_MEMORY_ALLOCATION_ERROR; | |
305 | return; | |
306 | } | |
307 | *string = 0; | |
308 | } | |
309 | m_data_ = ucol_openElements(order->ucollator, string, length, &status); | |
310 | ||
311 | /* Test for buffer overflows */ | |
312 | if (U_FAILURE(status)) { | |
313 | return; | |
314 | } | |
315 | m_data_->isWritable = TRUE; | |
b75a7d8f A |
316 | } |
317 | ||
318 | /** | |
319 | * This is the "real" constructor for this class; it constructs an iterator over | |
320 | * the source text using the specified collator | |
321 | */ | |
322 | CollationElementIterator::CollationElementIterator( | |
323 | const CharacterIterator& sourceText, | |
324 | const RuleBasedCollator* order, | |
325 | UErrorCode& status) | |
326 | : isDataOwned_(TRUE) | |
327 | { | |
73c04bcf | 328 | if (U_FAILURE(status)) |
b75a7d8f | 329 | return; |
73c04bcf A |
330 | |
331 | // **** should I just drop this test? **** | |
332 | /* | |
333 | if ( sourceText.endIndex() != 0 ) | |
334 | { | |
335 | // A CollationElementIterator is really a two-layered beast. | |
336 | // Internally it uses a Normalizer to munge the source text into a form | |
337 | // where all "composed" Unicode characters (such as \u00FC) are split into a | |
338 | // normal character and a combining accent character. | |
339 | // Afterward, CollationElementIterator does its own processing to handle | |
340 | // expanding and contracting collation sequences, ignorables, and so on. | |
341 | ||
342 | Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL | |
343 | ? Normalizer::NO_OP : order->getDecomposition(); | |
344 | ||
345 | text = new Normalizer(sourceText, decomp); | |
346 | if (text == NULL) | |
347 | status = U_MEMORY_ALLOCATION_ERROR; | |
348 | } | |
349 | */ | |
350 | int32_t length = sourceText.getLength(); | |
351 | UChar *buffer; | |
352 | if (length > 0) { | |
353 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
354 | /* test for NULL */ | |
355 | if (buffer == NULL) { | |
356 | status = U_MEMORY_ALLOCATION_ERROR; | |
357 | return; | |
358 | } | |
359 | /* | |
360 | Using this constructor will prevent buffer from being removed when | |
361 | string gets removed | |
362 | */ | |
363 | UnicodeString string(buffer, length, length); | |
364 | ((CharacterIterator &)sourceText).getText(string); | |
365 | const UChar *temp = string.getBuffer(); | |
366 | u_memcpy(buffer, temp, length); | |
367 | } | |
368 | else { | |
369 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
370 | /* test for NULL */ | |
371 | if (buffer == NULL) { | |
372 | status = U_MEMORY_ALLOCATION_ERROR; | |
373 | return; | |
374 | } | |
375 | *buffer = 0; | |
376 | } | |
377 | m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); | |
378 | ||
379 | /* Test for buffer overflows */ | |
380 | if (U_FAILURE(status)) { | |
b75a7d8f | 381 | return; |
73c04bcf A |
382 | } |
383 | m_data_->isWritable = TRUE; | |
b75a7d8f A |
384 | } |
385 | ||
386 | /* CollationElementIterator protected methods ----------------------------- */ | |
387 | ||
388 | const CollationElementIterator& CollationElementIterator::operator=( | |
389 | const CollationElementIterator& other) | |
390 | { | |
73c04bcf A |
391 | if (this != &other) |
392 | { | |
393 | UCollationElements *ucolelem = this->m_data_; | |
394 | UCollationElements *otherucolelem = other.m_data_; | |
395 | collIterate *coliter = &(ucolelem->iteratordata_); | |
396 | collIterate *othercoliter = &(otherucolelem->iteratordata_); | |
397 | int length = 0; | |
398 | ||
399 | // checking only UCOL_ITER_HASLEN is not enough here as we may be in | |
400 | // the normalization buffer | |
729e4ab9 | 401 | length = (int)(othercoliter->endp - othercoliter->string); |
73c04bcf A |
402 | |
403 | ucolelem->reset_ = otherucolelem->reset_; | |
404 | ucolelem->isWritable = TRUE; | |
405 | ||
406 | /* create a duplicate of string */ | |
407 | if (length > 0) { | |
408 | coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); | |
409 | if(coliter->string != NULL) { | |
729e4ab9 | 410 | uprv_memcpy((UChar *)coliter->string, othercoliter->string, |
73c04bcf A |
411 | length * U_SIZEOF_UCHAR); |
412 | } else { // Error: couldn't allocate memory. No copying should be done | |
413 | length = 0; | |
414 | } | |
415 | } | |
416 | else { | |
417 | coliter->string = NULL; | |
418 | } | |
419 | ||
420 | /* start and end of string */ | |
421 | coliter->endp = coliter->string + length; | |
422 | ||
423 | /* handle writable buffer here */ | |
424 | ||
425 | if (othercoliter->flags & UCOL_ITER_INNORMBUF) { | |
729e4ab9 A |
426 | coliter->writableBuffer = othercoliter->writableBuffer; |
427 | coliter->writableBuffer.getTerminatedBuffer(); | |
73c04bcf A |
428 | } |
429 | ||
430 | /* current position */ | |
431 | if (othercoliter->pos >= othercoliter->string && | |
432 | othercoliter->pos <= othercoliter->endp) | |
433 | { | |
434 | coliter->pos = coliter->string + | |
435 | (othercoliter->pos - othercoliter->string); | |
436 | } | |
46f4442e | 437 | else { |
729e4ab9 A |
438 | coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + |
439 | (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); | |
46f4442e | 440 | } |
73c04bcf A |
441 | |
442 | /* CE buffer */ | |
46f4442e A |
443 | int32_t CEsize; |
444 | if (coliter->extendCEs) { | |
445 | uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE); | |
446 | CEsize = sizeof(othercoliter->extendCEs); | |
447 | if (CEsize > 0) { | |
448 | othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); | |
449 | uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize); | |
450 | } | |
451 | coliter->toReturn = coliter->extendCEs + | |
452 | (othercoliter->toReturn - othercoliter->extendCEs); | |
453 | coliter->CEpos = coliter->extendCEs + CEsize; | |
454 | } else { | |
455 | CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); | |
456 | if (CEsize > 0) { | |
457 | uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); | |
458 | } | |
459 | coliter->toReturn = coliter->CEs + | |
460 | (othercoliter->toReturn - othercoliter->CEs); | |
461 | coliter->CEpos = coliter->CEs + CEsize; | |
73c04bcf | 462 | } |
73c04bcf A |
463 | |
464 | if (othercoliter->fcdPosition != NULL) { | |
465 | coliter->fcdPosition = coliter->string + | |
466 | (othercoliter->fcdPosition | |
467 | - othercoliter->string); | |
468 | } | |
469 | else { | |
470 | coliter->fcdPosition = NULL; | |
471 | } | |
472 | coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; | |
473 | coliter->origFlags = othercoliter->origFlags; | |
474 | coliter->coll = othercoliter->coll; | |
475 | this->isDataOwned_ = TRUE; | |
476 | } | |
477 | ||
478 | return *this; | |
b75a7d8f A |
479 | } |
480 | ||
481 | U_NAMESPACE_END | |
482 | ||
483 | #endif /* #if !UCONFIG_NO_COLLATION */ | |
484 | ||
485 | /* eof */ |