]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
4388f060 | 3 | * Copyright (C) 1996-2011, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ******************************************************************************* | |
6 | */ | |
7 | ||
8 | /* | |
9 | * File coleitr.cpp | |
10 | * | |
11 | * | |
12 | * | |
13 | * Created by: Helena Shih | |
14 | * | |
15 | * Modification History: | |
16 | * | |
17 | * Date Name Description | |
18 | * | |
19 | * 6/23/97 helena Adding comments to make code more readable. | |
20 | * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java | |
21 | * 12/10/99 aliu Ported Thai collation support from Java. | |
22 | * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) | |
23 | * 02/19/01 swquek Removed CollationElementsIterator() since it is | |
24 | * private constructor and no calls are made to it | |
25 | */ | |
26 | ||
27 | #include "unicode/utypes.h" | |
28 | ||
29 | #if !UCONFIG_NO_COLLATION | |
30 | ||
31 | #include "unicode/coleitr.h" | |
32 | #include "unicode/ustring.h" | |
33 | #include "ucol_imp.h" | |
4388f060 | 34 | #include "uassert.h" |
b75a7d8f A |
35 | #include "cmemory.h" |
36 | ||
37 | ||
38 | /* Constants --------------------------------------------------------------- */ | |
39 | ||
40 | U_NAMESPACE_BEGIN | |
41 | ||
374ca955 | 42 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) |
b75a7d8f | 43 | |
b75a7d8f A |
44 | /* CollationElementIterator public constructor/destructor ------------------ */ |
45 | ||
46 | CollationElementIterator::CollationElementIterator( | |
47 | const CollationElementIterator& other) | |
48 | : UObject(other), isDataOwned_(TRUE) | |
49 | { | |
73c04bcf A |
50 | UErrorCode status = U_ZERO_ERROR; |
51 | m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, | |
52 | &status); | |
b75a7d8f | 53 | |
73c04bcf | 54 | *this = other; |
b75a7d8f A |
55 | } |
56 | ||
57 | CollationElementIterator::~CollationElementIterator() | |
58 | { | |
73c04bcf A |
59 | if (isDataOwned_) { |
60 | ucol_closeElements(m_data_); | |
61 | } | |
b75a7d8f A |
62 | } |
63 | ||
64 | /* CollationElementIterator public methods --------------------------------- */ | |
65 | ||
66 | int32_t CollationElementIterator::getOffset() const | |
67 | { | |
73c04bcf | 68 | return ucol_getOffset(m_data_); |
b75a7d8f A |
69 | } |
70 | ||
71 | /** | |
72 | * Get the ordering priority of the next character in the string. | |
73 | * @return the next character's ordering. Returns NULLORDER if an error has | |
74 | * occured or if the end of string has been reached | |
75 | */ | |
76 | int32_t CollationElementIterator::next(UErrorCode& status) | |
77 | { | |
73c04bcf | 78 | return ucol_next(m_data_, &status); |
b75a7d8f A |
79 | } |
80 | ||
81 | UBool CollationElementIterator::operator!=( | |
82 | const CollationElementIterator& other) const | |
83 | { | |
73c04bcf | 84 | return !(*this == other); |
b75a7d8f A |
85 | } |
86 | ||
87 | UBool CollationElementIterator::operator==( | |
88 | const CollationElementIterator& that) const | |
89 | { | |
46f4442e | 90 | if (this == &that || m_data_ == that.m_data_) { |
b75a7d8f A |
91 | return TRUE; |
92 | } | |
93 | ||
94 | // option comparison | |
374ca955 | 95 | if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) |
b75a7d8f A |
96 | { |
97 | return FALSE; | |
98 | } | |
99 | ||
100 | // the constructor and setText always sets a length | |
101 | // and we only compare the string not the contents of the normalization | |
102 | // buffer | |
729e4ab9 A |
103 | int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string); |
104 | int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string); | |
b75a7d8f A |
105 | |
106 | if (thislength != thatlength) { | |
107 | return FALSE; | |
108 | } | |
109 | ||
110 | if (uprv_memcmp(m_data_->iteratordata_.string, | |
111 | that.m_data_->iteratordata_.string, | |
112 | thislength * U_SIZEOF_UCHAR) != 0) { | |
113 | return FALSE; | |
114 | } | |
115 | if (getOffset() != that.getOffset()) { | |
116 | return FALSE; | |
117 | } | |
118 | ||
119 | // checking normalization buffer | |
120 | if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { | |
46f4442e | 121 | if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { |
b75a7d8f A |
122 | return FALSE; |
123 | } | |
124 | // both are in the normalization buffer | |
125 | if (m_data_->iteratordata_.pos | |
729e4ab9 | 126 | - m_data_->iteratordata_.writableBuffer.getBuffer() |
b75a7d8f | 127 | != that.m_data_->iteratordata_.pos |
729e4ab9 | 128 | - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { |
b75a7d8f A |
129 | // not in the same position in the normalization buffer |
130 | return FALSE; | |
131 | } | |
132 | } | |
46f4442e | 133 | else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { |
b75a7d8f A |
134 | return FALSE; |
135 | } | |
136 | // checking ce position | |
137 | return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) | |
138 | == (that.m_data_->iteratordata_.CEpos | |
139 | - that.m_data_->iteratordata_.CEs); | |
140 | } | |
141 | ||
142 | /** | |
143 | * Get the ordering priority of the previous collation element in the string. | |
144 | * @param status the error code status. | |
145 | * @return the previous element's ordering. Returns NULLORDER if an error has | |
146 | * occured or if the start of string has been reached. | |
147 | */ | |
148 | int32_t CollationElementIterator::previous(UErrorCode& status) | |
149 | { | |
73c04bcf | 150 | return ucol_previous(m_data_, &status); |
b75a7d8f A |
151 | } |
152 | ||
153 | /** | |
154 | * Resets the cursor to the beginning of the string. | |
155 | */ | |
156 | void CollationElementIterator::reset() | |
157 | { | |
73c04bcf | 158 | ucol_reset(m_data_); |
b75a7d8f A |
159 | } |
160 | ||
161 | void CollationElementIterator::setOffset(int32_t newOffset, | |
162 | UErrorCode& status) | |
163 | { | |
73c04bcf | 164 | ucol_setOffset(m_data_, newOffset, &status); |
b75a7d8f A |
165 | } |
166 | ||
167 | /** | |
168 | * Sets the source to the new source string. | |
169 | */ | |
170 | void CollationElementIterator::setText(const UnicodeString& source, | |
171 | UErrorCode& status) | |
172 | { | |
73c04bcf | 173 | if (U_FAILURE(status)) { |
b75a7d8f A |
174 | return; |
175 | } | |
73c04bcf A |
176 | |
177 | int32_t length = source.length(); | |
178 | UChar *string = NULL; | |
179 | if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { | |
729e4ab9 | 180 | uprv_free((UChar *)m_data_->iteratordata_.string); |
73c04bcf A |
181 | } |
182 | m_data_->isWritable = TRUE; | |
183 | if (length > 0) { | |
184 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
185 | /* test for NULL */ | |
186 | if (string == NULL) { | |
187 | status = U_MEMORY_ALLOCATION_ERROR; | |
188 | return; | |
189 | } | |
190 | u_memcpy(string, source.getBuffer(), length); | |
191 | } | |
192 | else { | |
193 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
194 | /* test for NULL */ | |
195 | if (string == NULL) { | |
196 | status = U_MEMORY_ALLOCATION_ERROR; | |
197 | return; | |
198 | } | |
199 | *string = 0; | |
b75a7d8f | 200 | } |
46f4442e | 201 | /* Free offsetBuffer before initializing it. */ |
729e4ab9 | 202 | ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
73c04bcf | 203 | uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, |
729e4ab9 | 204 | &m_data_->iteratordata_, &status); |
b75a7d8f | 205 | |
73c04bcf | 206 | m_data_->reset_ = TRUE; |
b75a7d8f A |
207 | } |
208 | ||
209 | // Sets the source to the new character iterator. | |
210 | void CollationElementIterator::setText(CharacterIterator& source, | |
211 | UErrorCode& status) | |
212 | { | |
73c04bcf | 213 | if (U_FAILURE(status)) |
b75a7d8f | 214 | return; |
73c04bcf A |
215 | |
216 | int32_t length = source.getLength(); | |
217 | UChar *buffer = NULL; | |
218 | ||
219 | if (length == 0) { | |
220 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
221 | /* test for NULL */ | |
222 | if (buffer == NULL) { | |
223 | status = U_MEMORY_ALLOCATION_ERROR; | |
224 | return; | |
225 | } | |
226 | *buffer = 0; | |
b75a7d8f | 227 | } |
73c04bcf A |
228 | else { |
229 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
230 | /* test for NULL */ | |
231 | if (buffer == NULL) { | |
232 | status = U_MEMORY_ALLOCATION_ERROR; | |
233 | return; | |
234 | } | |
235 | /* | |
236 | Using this constructor will prevent buffer from being removed when | |
237 | string gets removed | |
238 | */ | |
239 | UnicodeString string; | |
240 | source.getText(string); | |
241 | u_memcpy(buffer, string.getBuffer(), length); | |
242 | } | |
243 | ||
244 | if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { | |
729e4ab9 | 245 | uprv_free((UChar *)m_data_->iteratordata_.string); |
73c04bcf A |
246 | } |
247 | m_data_->isWritable = TRUE; | |
46f4442e | 248 | /* Free offsetBuffer before initializing it. */ |
729e4ab9 | 249 | ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
73c04bcf | 250 | uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, |
729e4ab9 | 251 | &m_data_->iteratordata_, &status); |
73c04bcf | 252 | m_data_->reset_ = TRUE; |
b75a7d8f A |
253 | } |
254 | ||
255 | int32_t CollationElementIterator::strengthOrder(int32_t order) const | |
256 | { | |
73c04bcf A |
257 | UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); |
258 | // Mask off the unwanted differences. | |
259 | if (s == UCOL_PRIMARY) { | |
260 | order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; | |
261 | } | |
262 | else if (s == UCOL_SECONDARY) { | |
263 | order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; | |
264 | } | |
265 | ||
266 | return order; | |
b75a7d8f A |
267 | } |
268 | ||
269 | /* CollationElementIterator private constructors/destructors --------------- */ | |
270 | ||
271 | /** | |
272 | * This is the "real" constructor for this class; it constructs an iterator | |
273 | * over the source text using the specified collator | |
274 | */ | |
275 | CollationElementIterator::CollationElementIterator( | |
276 | const UnicodeString& sourceText, | |
277 | const RuleBasedCollator* order, | |
278 | UErrorCode& status) | |
279 | : isDataOwned_(TRUE) | |
280 | { | |
73c04bcf | 281 | if (U_FAILURE(status)) { |
b75a7d8f | 282 | return; |
73c04bcf A |
283 | } |
284 | ||
285 | int32_t length = sourceText.length(); | |
286 | UChar *string = NULL; | |
287 | ||
288 | if (length > 0) { | |
289 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
290 | /* test for NULL */ | |
291 | if (string == NULL) { | |
292 | status = U_MEMORY_ALLOCATION_ERROR; | |
293 | return; | |
294 | } | |
295 | /* | |
296 | Using this constructor will prevent buffer from being removed when | |
297 | string gets removed | |
298 | */ | |
299 | u_memcpy(string, sourceText.getBuffer(), length); | |
300 | } | |
301 | else { | |
302 | string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
303 | /* test for NULL */ | |
304 | if (string == NULL) { | |
305 | status = U_MEMORY_ALLOCATION_ERROR; | |
306 | return; | |
307 | } | |
308 | *string = 0; | |
309 | } | |
310 | m_data_ = ucol_openElements(order->ucollator, string, length, &status); | |
311 | ||
312 | /* Test for buffer overflows */ | |
313 | if (U_FAILURE(status)) { | |
314 | return; | |
315 | } | |
316 | m_data_->isWritable = TRUE; | |
b75a7d8f A |
317 | } |
318 | ||
319 | /** | |
320 | * This is the "real" constructor for this class; it constructs an iterator over | |
321 | * the source text using the specified collator | |
322 | */ | |
323 | CollationElementIterator::CollationElementIterator( | |
324 | const CharacterIterator& sourceText, | |
325 | const RuleBasedCollator* order, | |
326 | UErrorCode& status) | |
327 | : isDataOwned_(TRUE) | |
328 | { | |
73c04bcf | 329 | if (U_FAILURE(status)) |
b75a7d8f | 330 | return; |
73c04bcf A |
331 | |
332 | // **** should I just drop this test? **** | |
333 | /* | |
334 | if ( sourceText.endIndex() != 0 ) | |
335 | { | |
336 | // A CollationElementIterator is really a two-layered beast. | |
337 | // Internally it uses a Normalizer to munge the source text into a form | |
338 | // where all "composed" Unicode characters (such as \u00FC) are split into a | |
339 | // normal character and a combining accent character. | |
340 | // Afterward, CollationElementIterator does its own processing to handle | |
341 | // expanding and contracting collation sequences, ignorables, and so on. | |
342 | ||
343 | Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL | |
344 | ? Normalizer::NO_OP : order->getDecomposition(); | |
345 | ||
346 | text = new Normalizer(sourceText, decomp); | |
347 | if (text == NULL) | |
348 | status = U_MEMORY_ALLOCATION_ERROR; | |
349 | } | |
350 | */ | |
351 | int32_t length = sourceText.getLength(); | |
352 | UChar *buffer; | |
353 | if (length > 0) { | |
354 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); | |
355 | /* test for NULL */ | |
356 | if (buffer == NULL) { | |
357 | status = U_MEMORY_ALLOCATION_ERROR; | |
358 | return; | |
359 | } | |
360 | /* | |
361 | Using this constructor will prevent buffer from being removed when | |
362 | string gets removed | |
363 | */ | |
364 | UnicodeString string(buffer, length, length); | |
365 | ((CharacterIterator &)sourceText).getText(string); | |
366 | const UChar *temp = string.getBuffer(); | |
367 | u_memcpy(buffer, temp, length); | |
368 | } | |
369 | else { | |
370 | buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); | |
371 | /* test for NULL */ | |
372 | if (buffer == NULL) { | |
373 | status = U_MEMORY_ALLOCATION_ERROR; | |
374 | return; | |
375 | } | |
376 | *buffer = 0; | |
377 | } | |
378 | m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); | |
379 | ||
380 | /* Test for buffer overflows */ | |
381 | if (U_FAILURE(status)) { | |
b75a7d8f | 382 | return; |
73c04bcf A |
383 | } |
384 | m_data_->isWritable = TRUE; | |
b75a7d8f A |
385 | } |
386 | ||
387 | /* CollationElementIterator protected methods ----------------------------- */ | |
388 | ||
389 | const CollationElementIterator& CollationElementIterator::operator=( | |
390 | const CollationElementIterator& other) | |
391 | { | |
73c04bcf A |
392 | if (this != &other) |
393 | { | |
394 | UCollationElements *ucolelem = this->m_data_; | |
395 | UCollationElements *otherucolelem = other.m_data_; | |
396 | collIterate *coliter = &(ucolelem->iteratordata_); | |
397 | collIterate *othercoliter = &(otherucolelem->iteratordata_); | |
398 | int length = 0; | |
399 | ||
400 | // checking only UCOL_ITER_HASLEN is not enough here as we may be in | |
401 | // the normalization buffer | |
729e4ab9 | 402 | length = (int)(othercoliter->endp - othercoliter->string); |
73c04bcf A |
403 | |
404 | ucolelem->reset_ = otherucolelem->reset_; | |
405 | ucolelem->isWritable = TRUE; | |
406 | ||
407 | /* create a duplicate of string */ | |
408 | if (length > 0) { | |
409 | coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); | |
410 | if(coliter->string != NULL) { | |
729e4ab9 | 411 | uprv_memcpy((UChar *)coliter->string, othercoliter->string, |
73c04bcf A |
412 | length * U_SIZEOF_UCHAR); |
413 | } else { // Error: couldn't allocate memory. No copying should be done | |
414 | length = 0; | |
415 | } | |
416 | } | |
417 | else { | |
418 | coliter->string = NULL; | |
419 | } | |
420 | ||
421 | /* start and end of string */ | |
4388f060 | 422 | coliter->endp = coliter->string == NULL ? NULL : coliter->string + length; |
73c04bcf A |
423 | |
424 | /* handle writable buffer here */ | |
425 | ||
426 | if (othercoliter->flags & UCOL_ITER_INNORMBUF) { | |
729e4ab9 A |
427 | coliter->writableBuffer = othercoliter->writableBuffer; |
428 | coliter->writableBuffer.getTerminatedBuffer(); | |
73c04bcf A |
429 | } |
430 | ||
431 | /* current position */ | |
432 | if (othercoliter->pos >= othercoliter->string && | |
433 | othercoliter->pos <= othercoliter->endp) | |
434 | { | |
4388f060 | 435 | U_ASSERT(coliter->string != NULL); |
73c04bcf A |
436 | coliter->pos = coliter->string + |
437 | (othercoliter->pos - othercoliter->string); | |
438 | } | |
46f4442e | 439 | else { |
729e4ab9 A |
440 | coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + |
441 | (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); | |
46f4442e | 442 | } |
73c04bcf A |
443 | |
444 | /* CE buffer */ | |
46f4442e A |
445 | int32_t CEsize; |
446 | if (coliter->extendCEs) { | |
447 | uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE); | |
448 | CEsize = sizeof(othercoliter->extendCEs); | |
449 | if (CEsize > 0) { | |
450 | othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); | |
451 | uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize); | |
452 | } | |
453 | coliter->toReturn = coliter->extendCEs + | |
454 | (othercoliter->toReturn - othercoliter->extendCEs); | |
455 | coliter->CEpos = coliter->extendCEs + CEsize; | |
456 | } else { | |
457 | CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); | |
458 | if (CEsize > 0) { | |
459 | uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); | |
460 | } | |
461 | coliter->toReturn = coliter->CEs + | |
462 | (othercoliter->toReturn - othercoliter->CEs); | |
463 | coliter->CEpos = coliter->CEs + CEsize; | |
73c04bcf | 464 | } |
73c04bcf A |
465 | |
466 | if (othercoliter->fcdPosition != NULL) { | |
4388f060 | 467 | U_ASSERT(coliter->string != NULL); |
73c04bcf A |
468 | coliter->fcdPosition = coliter->string + |
469 | (othercoliter->fcdPosition | |
470 | - othercoliter->string); | |
471 | } | |
472 | else { | |
473 | coliter->fcdPosition = NULL; | |
474 | } | |
475 | coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; | |
476 | coliter->origFlags = othercoliter->origFlags; | |
477 | coliter->coll = othercoliter->coll; | |
478 | this->isDataOwned_ = TRUE; | |
479 | } | |
480 | ||
481 | return *this; | |
b75a7d8f A |
482 | } |
483 | ||
484 | U_NAMESPACE_END | |
485 | ||
486 | #endif /* #if !UCONFIG_NO_COLLATION */ | |
487 | ||
488 | /* eof */ |