]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/coleitr.cpp
ICU-400.39.tar.gz
[apple/icu.git] / icuSources / i18n / coleitr.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
46f4442e 3* Copyright (C) 1996-2008, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5*******************************************************************************
6*/
7
8/*
9* File coleitr.cpp
10*
11*
12*
13* Created by: Helena Shih
14*
15* Modification History:
16*
17* Date Name Description
18*
19* 6/23/97 helena Adding comments to make code more readable.
20* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21* 12/10/99 aliu Ported Thai collation support from Java.
22* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23* 02/19/01 swquek Removed CollationElementsIterator() since it is
24* private constructor and no calls are made to it
25*/
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_COLLATION
30
31#include "unicode/coleitr.h"
32#include "unicode/ustring.h"
33#include "ucol_imp.h"
34#include "cmemory.h"
35
36
37/* Constants --------------------------------------------------------------- */
38
39U_NAMESPACE_BEGIN
40
374ca955 41UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
b75a7d8f 42
b75a7d8f
A
43/* CollationElementIterator public constructor/destructor ------------------ */
44
45CollationElementIterator::CollationElementIterator(
46 const CollationElementIterator& other)
47 : UObject(other), isDataOwned_(TRUE)
48{
73c04bcf
A
49 UErrorCode status = U_ZERO_ERROR;
50 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
51 &status);
b75a7d8f 52
73c04bcf 53 *this = other;
b75a7d8f
A
54}
55
56CollationElementIterator::~CollationElementIterator()
57{
73c04bcf
A
58 if (isDataOwned_) {
59 ucol_closeElements(m_data_);
60 }
b75a7d8f
A
61}
62
63/* CollationElementIterator public methods --------------------------------- */
64
65int32_t CollationElementIterator::getOffset() const
66{
73c04bcf 67 return ucol_getOffset(m_data_);
b75a7d8f
A
68}
69
70/**
71* Get the ordering priority of the next character in the string.
72* @return the next character's ordering. Returns NULLORDER if an error has
73* occured or if the end of string has been reached
74*/
75int32_t CollationElementIterator::next(UErrorCode& status)
76{
73c04bcf 77 return ucol_next(m_data_, &status);
b75a7d8f
A
78}
79
80UBool CollationElementIterator::operator!=(
81 const CollationElementIterator& other) const
82{
73c04bcf 83 return !(*this == other);
b75a7d8f
A
84}
85
86UBool CollationElementIterator::operator==(
87 const CollationElementIterator& that) const
88{
46f4442e 89 if (this == &that || m_data_ == that.m_data_) {
b75a7d8f
A
90 return TRUE;
91 }
92
93 // option comparison
374ca955 94 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
b75a7d8f
A
95 {
96 return FALSE;
97 }
98
99 // the constructor and setText always sets a length
100 // and we only compare the string not the contents of the normalization
101 // buffer
102 int thislength = m_data_->iteratordata_.endp -
103 m_data_->iteratordata_.string;
104 int thatlength = that.m_data_->iteratordata_.endp -
105 that.m_data_->iteratordata_.string;
106
107 if (thislength != thatlength) {
108 return FALSE;
109 }
110
111 if (uprv_memcmp(m_data_->iteratordata_.string,
112 that.m_data_->iteratordata_.string,
113 thislength * U_SIZEOF_UCHAR) != 0) {
114 return FALSE;
115 }
116 if (getOffset() != that.getOffset()) {
117 return FALSE;
118 }
119
120 // checking normalization buffer
121 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
46f4442e 122 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
b75a7d8f
A
123 return FALSE;
124 }
125 // both are in the normalization buffer
126 if (m_data_->iteratordata_.pos
127 - m_data_->iteratordata_.writableBuffer
128 != that.m_data_->iteratordata_.pos
129 - that.m_data_->iteratordata_.writableBuffer) {
130 // not in the same position in the normalization buffer
131 return FALSE;
132 }
133 }
46f4442e 134 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
b75a7d8f
A
135 return FALSE;
136 }
137 // checking ce position
138 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
139 == (that.m_data_->iteratordata_.CEpos
140 - that.m_data_->iteratordata_.CEs);
141}
142
143/**
144* Get the ordering priority of the previous collation element in the string.
145* @param status the error code status.
146* @return the previous element's ordering. Returns NULLORDER if an error has
147* occured or if the start of string has been reached.
148*/
149int32_t CollationElementIterator::previous(UErrorCode& status)
150{
73c04bcf 151 return ucol_previous(m_data_, &status);
b75a7d8f
A
152}
153
154/**
155* Resets the cursor to the beginning of the string.
156*/
157void CollationElementIterator::reset()
158{
73c04bcf 159 ucol_reset(m_data_);
b75a7d8f
A
160}
161
162void CollationElementIterator::setOffset(int32_t newOffset,
163 UErrorCode& status)
164{
73c04bcf 165 ucol_setOffset(m_data_, newOffset, &status);
b75a7d8f
A
166}
167
168/**
169* Sets the source to the new source string.
170*/
171void CollationElementIterator::setText(const UnicodeString& source,
172 UErrorCode& status)
173{
73c04bcf 174 if (U_FAILURE(status)) {
b75a7d8f
A
175 return;
176 }
73c04bcf
A
177
178 int32_t length = source.length();
179 UChar *string = NULL;
180 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
181 uprv_free(m_data_->iteratordata_.string);
182 }
183 m_data_->isWritable = TRUE;
184 if (length > 0) {
185 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
186 /* test for NULL */
187 if (string == NULL) {
188 status = U_MEMORY_ALLOCATION_ERROR;
189 return;
190 }
191 u_memcpy(string, source.getBuffer(), length);
192 }
193 else {
194 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
195 /* test for NULL */
196 if (string == NULL) {
197 status = U_MEMORY_ALLOCATION_ERROR;
198 return;
199 }
200 *string = 0;
b75a7d8f 201 }
46f4442e
A
202 /* Free offsetBuffer before initializing it. */
203 freeOffsetBuffer(&(m_data_->iteratordata_));
73c04bcf
A
204 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
205 &m_data_->iteratordata_);
b75a7d8f 206
73c04bcf 207 m_data_->reset_ = TRUE;
b75a7d8f
A
208}
209
210// Sets the source to the new character iterator.
211void CollationElementIterator::setText(CharacterIterator& source,
212 UErrorCode& status)
213{
73c04bcf 214 if (U_FAILURE(status))
b75a7d8f 215 return;
73c04bcf
A
216
217 int32_t length = source.getLength();
218 UChar *buffer = NULL;
219
220 if (length == 0) {
221 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
222 /* test for NULL */
223 if (buffer == NULL) {
224 status = U_MEMORY_ALLOCATION_ERROR;
225 return;
226 }
227 *buffer = 0;
b75a7d8f 228 }
73c04bcf
A
229 else {
230 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
231 /* test for NULL */
232 if (buffer == NULL) {
233 status = U_MEMORY_ALLOCATION_ERROR;
234 return;
235 }
236 /*
237 Using this constructor will prevent buffer from being removed when
238 string gets removed
239 */
240 UnicodeString string;
241 source.getText(string);
242 u_memcpy(buffer, string.getBuffer(), length);
243 }
244
245 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
246 uprv_free(m_data_->iteratordata_.string);
247 }
248 m_data_->isWritable = TRUE;
46f4442e
A
249 /* Free offsetBuffer before initializing it. */
250 freeOffsetBuffer(&(m_data_->iteratordata_));
73c04bcf
A
251 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
252 &m_data_->iteratordata_);
253 m_data_->reset_ = TRUE;
b75a7d8f
A
254}
255
256int32_t CollationElementIterator::strengthOrder(int32_t order) const
257{
73c04bcf
A
258 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
259 // Mask off the unwanted differences.
260 if (s == UCOL_PRIMARY) {
261 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
262 }
263 else if (s == UCOL_SECONDARY) {
264 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
265 }
266
267 return order;
b75a7d8f
A
268}
269
270/* CollationElementIterator private constructors/destructors --------------- */
271
272/**
273* This is the "real" constructor for this class; it constructs an iterator
274* over the source text using the specified collator
275*/
276CollationElementIterator::CollationElementIterator(
277 const UnicodeString& sourceText,
278 const RuleBasedCollator* order,
279 UErrorCode& status)
280 : isDataOwned_(TRUE)
281{
73c04bcf 282 if (U_FAILURE(status)) {
b75a7d8f 283 return;
73c04bcf
A
284 }
285
286 int32_t length = sourceText.length();
287 UChar *string = NULL;
288
289 if (length > 0) {
290 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
291 /* test for NULL */
292 if (string == NULL) {
293 status = U_MEMORY_ALLOCATION_ERROR;
294 return;
295 }
296 /*
297 Using this constructor will prevent buffer from being removed when
298 string gets removed
299 */
300 u_memcpy(string, sourceText.getBuffer(), length);
301 }
302 else {
303 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
304 /* test for NULL */
305 if (string == NULL) {
306 status = U_MEMORY_ALLOCATION_ERROR;
307 return;
308 }
309 *string = 0;
310 }
311 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
312
313 /* Test for buffer overflows */
314 if (U_FAILURE(status)) {
315 return;
316 }
317 m_data_->isWritable = TRUE;
b75a7d8f
A
318}
319
320/**
321* This is the "real" constructor for this class; it constructs an iterator over
322* the source text using the specified collator
323*/
324CollationElementIterator::CollationElementIterator(
325 const CharacterIterator& sourceText,
326 const RuleBasedCollator* order,
327 UErrorCode& status)
328 : isDataOwned_(TRUE)
329{
73c04bcf 330 if (U_FAILURE(status))
b75a7d8f 331 return;
73c04bcf
A
332
333 // **** should I just drop this test? ****
334 /*
335 if ( sourceText.endIndex() != 0 )
336 {
337 // A CollationElementIterator is really a two-layered beast.
338 // Internally it uses a Normalizer to munge the source text into a form
339 // where all "composed" Unicode characters (such as \u00FC) are split into a
340 // normal character and a combining accent character.
341 // Afterward, CollationElementIterator does its own processing to handle
342 // expanding and contracting collation sequences, ignorables, and so on.
343
344 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
345 ? Normalizer::NO_OP : order->getDecomposition();
346
347 text = new Normalizer(sourceText, decomp);
348 if (text == NULL)
349 status = U_MEMORY_ALLOCATION_ERROR;
350 }
351 */
352 int32_t length = sourceText.getLength();
353 UChar *buffer;
354 if (length > 0) {
355 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
356 /* test for NULL */
357 if (buffer == NULL) {
358 status = U_MEMORY_ALLOCATION_ERROR;
359 return;
360 }
361 /*
362 Using this constructor will prevent buffer from being removed when
363 string gets removed
364 */
365 UnicodeString string(buffer, length, length);
366 ((CharacterIterator &)sourceText).getText(string);
367 const UChar *temp = string.getBuffer();
368 u_memcpy(buffer, temp, length);
369 }
370 else {
371 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
372 /* test for NULL */
373 if (buffer == NULL) {
374 status = U_MEMORY_ALLOCATION_ERROR;
375 return;
376 }
377 *buffer = 0;
378 }
379 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
380
381 /* Test for buffer overflows */
382 if (U_FAILURE(status)) {
b75a7d8f 383 return;
73c04bcf
A
384 }
385 m_data_->isWritable = TRUE;
b75a7d8f
A
386}
387
388/* CollationElementIterator protected methods ----------------------------- */
389
390const CollationElementIterator& CollationElementIterator::operator=(
391 const CollationElementIterator& other)
392{
73c04bcf
A
393 if (this != &other)
394 {
395 UCollationElements *ucolelem = this->m_data_;
396 UCollationElements *otherucolelem = other.m_data_;
397 collIterate *coliter = &(ucolelem->iteratordata_);
398 collIterate *othercoliter = &(otherucolelem->iteratordata_);
399 int length = 0;
400
401 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
402 // the normalization buffer
403 length = othercoliter->endp - othercoliter->string;
404
405 ucolelem->reset_ = otherucolelem->reset_;
406 ucolelem->isWritable = TRUE;
407
408 /* create a duplicate of string */
409 if (length > 0) {
410 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
411 if(coliter->string != NULL) {
412 uprv_memcpy(coliter->string, othercoliter->string,
413 length * U_SIZEOF_UCHAR);
414 } else { // Error: couldn't allocate memory. No copying should be done
415 length = 0;
416 }
417 }
418 else {
419 coliter->string = NULL;
420 }
421
422 /* start and end of string */
423 coliter->endp = coliter->string + length;
424
425 /* handle writable buffer here */
426
427 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
428 uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
429 if (wlength < coliter->writableBufSize) {
430 uprv_memcpy(coliter->stackWritableBuffer,
431 othercoliter->stackWritableBuffer,
432 wlength * U_SIZEOF_UCHAR);
433 }
434 else {
435 if (coliter->writableBuffer != coliter->stackWritableBuffer) {
436 uprv_free(coliter->writableBuffer);
437 }
438 coliter->writableBuffer = (UChar *)uprv_malloc(
439 wlength * U_SIZEOF_UCHAR);
440 if(coliter->writableBuffer != NULL) {
441 uprv_memcpy(coliter->writableBuffer,
442 othercoliter->writableBuffer,
443 wlength * U_SIZEOF_UCHAR);
444 coliter->writableBufSize = wlength;
445 } else { // Error: couldn't allocate memory for writableBuffer
446 coliter->writableBufSize = 0;
447 }
448 }
449 }
450
451 /* current position */
452 if (othercoliter->pos >= othercoliter->string &&
453 othercoliter->pos <= othercoliter->endp)
454 {
455 coliter->pos = coliter->string +
456 (othercoliter->pos - othercoliter->string);
457 }
46f4442e 458 else if (coliter->writableBuffer != NULL) {
73c04bcf
A
459 coliter->pos = coliter->writableBuffer +
460 (othercoliter->pos - othercoliter->writableBuffer);
461 }
46f4442e
A
462 else {
463 // Error: couldn't allocate memory for writableBuffer
464 coliter->pos = NULL;
465 }
73c04bcf
A
466
467 /* CE buffer */
46f4442e
A
468 int32_t CEsize;
469 if (coliter->extendCEs) {
470 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
471 CEsize = sizeof(othercoliter->extendCEs);
472 if (CEsize > 0) {
473 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
474 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
475 }
476 coliter->toReturn = coliter->extendCEs +
477 (othercoliter->toReturn - othercoliter->extendCEs);
478 coliter->CEpos = coliter->extendCEs + CEsize;
479 } else {
480 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
481 if (CEsize > 0) {
482 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
483 }
484 coliter->toReturn = coliter->CEs +
485 (othercoliter->toReturn - othercoliter->CEs);
486 coliter->CEpos = coliter->CEs + CEsize;
73c04bcf 487 }
73c04bcf
A
488
489 if (othercoliter->fcdPosition != NULL) {
490 coliter->fcdPosition = coliter->string +
491 (othercoliter->fcdPosition
492 - othercoliter->string);
493 }
494 else {
495 coliter->fcdPosition = NULL;
496 }
497 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
498 coliter->origFlags = othercoliter->origFlags;
499 coliter->coll = othercoliter->coll;
500 this->isDataOwned_ = TRUE;
501 }
502
503 return *this;
b75a7d8f
A
504}
505
506U_NAMESPACE_END
507
508#endif /* #if !UCONFIG_NO_COLLATION */
509
510/* eof */