]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/coleitr.cpp
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / i18n / coleitr.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
4388f060 3* Copyright (C) 1996-2011, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5*******************************************************************************
6*/
7
8/*
9* File coleitr.cpp
10*
11*
12*
13* Created by: Helena Shih
14*
15* Modification History:
16*
17* Date Name Description
18*
19* 6/23/97 helena Adding comments to make code more readable.
20* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21* 12/10/99 aliu Ported Thai collation support from Java.
22* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23* 02/19/01 swquek Removed CollationElementsIterator() since it is
24* private constructor and no calls are made to it
25*/
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_COLLATION
30
31#include "unicode/coleitr.h"
32#include "unicode/ustring.h"
33#include "ucol_imp.h"
4388f060 34#include "uassert.h"
b75a7d8f
A
35#include "cmemory.h"
36
37
38/* Constants --------------------------------------------------------------- */
39
40U_NAMESPACE_BEGIN
41
374ca955 42UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
b75a7d8f 43
b75a7d8f
A
44/* CollationElementIterator public constructor/destructor ------------------ */
45
46CollationElementIterator::CollationElementIterator(
47 const CollationElementIterator& other)
48 : UObject(other), isDataOwned_(TRUE)
49{
73c04bcf
A
50 UErrorCode status = U_ZERO_ERROR;
51 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
52 &status);
b75a7d8f 53
73c04bcf 54 *this = other;
b75a7d8f
A
55}
56
57CollationElementIterator::~CollationElementIterator()
58{
73c04bcf
A
59 if (isDataOwned_) {
60 ucol_closeElements(m_data_);
61 }
b75a7d8f
A
62}
63
64/* CollationElementIterator public methods --------------------------------- */
65
66int32_t CollationElementIterator::getOffset() const
67{
73c04bcf 68 return ucol_getOffset(m_data_);
b75a7d8f
A
69}
70
71/**
72* Get the ordering priority of the next character in the string.
73* @return the next character's ordering. Returns NULLORDER if an error has
74* occured or if the end of string has been reached
75*/
76int32_t CollationElementIterator::next(UErrorCode& status)
77{
73c04bcf 78 return ucol_next(m_data_, &status);
b75a7d8f
A
79}
80
81UBool CollationElementIterator::operator!=(
82 const CollationElementIterator& other) const
83{
73c04bcf 84 return !(*this == other);
b75a7d8f
A
85}
86
87UBool CollationElementIterator::operator==(
88 const CollationElementIterator& that) const
89{
46f4442e 90 if (this == &that || m_data_ == that.m_data_) {
b75a7d8f
A
91 return TRUE;
92 }
93
94 // option comparison
374ca955 95 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
b75a7d8f
A
96 {
97 return FALSE;
98 }
99
100 // the constructor and setText always sets a length
101 // and we only compare the string not the contents of the normalization
102 // buffer
729e4ab9
A
103 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
104 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
b75a7d8f
A
105
106 if (thislength != thatlength) {
107 return FALSE;
108 }
109
110 if (uprv_memcmp(m_data_->iteratordata_.string,
111 that.m_data_->iteratordata_.string,
112 thislength * U_SIZEOF_UCHAR) != 0) {
113 return FALSE;
114 }
115 if (getOffset() != that.getOffset()) {
116 return FALSE;
117 }
118
119 // checking normalization buffer
120 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
46f4442e 121 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
b75a7d8f
A
122 return FALSE;
123 }
124 // both are in the normalization buffer
125 if (m_data_->iteratordata_.pos
729e4ab9 126 - m_data_->iteratordata_.writableBuffer.getBuffer()
b75a7d8f 127 != that.m_data_->iteratordata_.pos
729e4ab9 128 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
b75a7d8f
A
129 // not in the same position in the normalization buffer
130 return FALSE;
131 }
132 }
46f4442e 133 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
b75a7d8f
A
134 return FALSE;
135 }
136 // checking ce position
137 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
138 == (that.m_data_->iteratordata_.CEpos
139 - that.m_data_->iteratordata_.CEs);
140}
141
142/**
143* Get the ordering priority of the previous collation element in the string.
144* @param status the error code status.
145* @return the previous element's ordering. Returns NULLORDER if an error has
146* occured or if the start of string has been reached.
147*/
148int32_t CollationElementIterator::previous(UErrorCode& status)
149{
73c04bcf 150 return ucol_previous(m_data_, &status);
b75a7d8f
A
151}
152
153/**
154* Resets the cursor to the beginning of the string.
155*/
156void CollationElementIterator::reset()
157{
73c04bcf 158 ucol_reset(m_data_);
b75a7d8f
A
159}
160
161void CollationElementIterator::setOffset(int32_t newOffset,
162 UErrorCode& status)
163{
73c04bcf 164 ucol_setOffset(m_data_, newOffset, &status);
b75a7d8f
A
165}
166
167/**
168* Sets the source to the new source string.
169*/
170void CollationElementIterator::setText(const UnicodeString& source,
171 UErrorCode& status)
172{
73c04bcf 173 if (U_FAILURE(status)) {
b75a7d8f
A
174 return;
175 }
73c04bcf
A
176
177 int32_t length = source.length();
178 UChar *string = NULL;
179 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
729e4ab9 180 uprv_free((UChar *)m_data_->iteratordata_.string);
73c04bcf
A
181 }
182 m_data_->isWritable = TRUE;
183 if (length > 0) {
184 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
185 /* test for NULL */
186 if (string == NULL) {
187 status = U_MEMORY_ALLOCATION_ERROR;
188 return;
189 }
190 u_memcpy(string, source.getBuffer(), length);
191 }
192 else {
193 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
194 /* test for NULL */
195 if (string == NULL) {
196 status = U_MEMORY_ALLOCATION_ERROR;
197 return;
198 }
199 *string = 0;
b75a7d8f 200 }
46f4442e 201 /* Free offsetBuffer before initializing it. */
729e4ab9 202 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
73c04bcf 203 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
729e4ab9 204 &m_data_->iteratordata_, &status);
b75a7d8f 205
73c04bcf 206 m_data_->reset_ = TRUE;
b75a7d8f
A
207}
208
209// Sets the source to the new character iterator.
210void CollationElementIterator::setText(CharacterIterator& source,
211 UErrorCode& status)
212{
73c04bcf 213 if (U_FAILURE(status))
b75a7d8f 214 return;
73c04bcf
A
215
216 int32_t length = source.getLength();
217 UChar *buffer = NULL;
218
219 if (length == 0) {
220 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
221 /* test for NULL */
222 if (buffer == NULL) {
223 status = U_MEMORY_ALLOCATION_ERROR;
224 return;
225 }
226 *buffer = 0;
b75a7d8f 227 }
73c04bcf
A
228 else {
229 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
230 /* test for NULL */
231 if (buffer == NULL) {
232 status = U_MEMORY_ALLOCATION_ERROR;
233 return;
234 }
235 /*
236 Using this constructor will prevent buffer from being removed when
237 string gets removed
238 */
239 UnicodeString string;
240 source.getText(string);
241 u_memcpy(buffer, string.getBuffer(), length);
242 }
243
244 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
729e4ab9 245 uprv_free((UChar *)m_data_->iteratordata_.string);
73c04bcf
A
246 }
247 m_data_->isWritable = TRUE;
46f4442e 248 /* Free offsetBuffer before initializing it. */
729e4ab9 249 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
73c04bcf 250 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
729e4ab9 251 &m_data_->iteratordata_, &status);
73c04bcf 252 m_data_->reset_ = TRUE;
b75a7d8f
A
253}
254
255int32_t CollationElementIterator::strengthOrder(int32_t order) const
256{
73c04bcf
A
257 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
258 // Mask off the unwanted differences.
259 if (s == UCOL_PRIMARY) {
260 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
261 }
262 else if (s == UCOL_SECONDARY) {
263 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
264 }
265
266 return order;
b75a7d8f
A
267}
268
269/* CollationElementIterator private constructors/destructors --------------- */
270
271/**
272* This is the "real" constructor for this class; it constructs an iterator
273* over the source text using the specified collator
274*/
275CollationElementIterator::CollationElementIterator(
276 const UnicodeString& sourceText,
277 const RuleBasedCollator* order,
278 UErrorCode& status)
279 : isDataOwned_(TRUE)
280{
73c04bcf 281 if (U_FAILURE(status)) {
b75a7d8f 282 return;
73c04bcf
A
283 }
284
285 int32_t length = sourceText.length();
286 UChar *string = NULL;
287
288 if (length > 0) {
289 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
290 /* test for NULL */
291 if (string == NULL) {
292 status = U_MEMORY_ALLOCATION_ERROR;
293 return;
294 }
295 /*
296 Using this constructor will prevent buffer from being removed when
297 string gets removed
298 */
299 u_memcpy(string, sourceText.getBuffer(), length);
300 }
301 else {
302 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
303 /* test for NULL */
304 if (string == NULL) {
305 status = U_MEMORY_ALLOCATION_ERROR;
306 return;
307 }
308 *string = 0;
309 }
310 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
311
312 /* Test for buffer overflows */
313 if (U_FAILURE(status)) {
314 return;
315 }
316 m_data_->isWritable = TRUE;
b75a7d8f
A
317}
318
319/**
320* This is the "real" constructor for this class; it constructs an iterator over
321* the source text using the specified collator
322*/
323CollationElementIterator::CollationElementIterator(
324 const CharacterIterator& sourceText,
325 const RuleBasedCollator* order,
326 UErrorCode& status)
327 : isDataOwned_(TRUE)
328{
73c04bcf 329 if (U_FAILURE(status))
b75a7d8f 330 return;
73c04bcf
A
331
332 // **** should I just drop this test? ****
333 /*
334 if ( sourceText.endIndex() != 0 )
335 {
336 // A CollationElementIterator is really a two-layered beast.
337 // Internally it uses a Normalizer to munge the source text into a form
338 // where all "composed" Unicode characters (such as \u00FC) are split into a
339 // normal character and a combining accent character.
340 // Afterward, CollationElementIterator does its own processing to handle
341 // expanding and contracting collation sequences, ignorables, and so on.
342
343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
344 ? Normalizer::NO_OP : order->getDecomposition();
345
346 text = new Normalizer(sourceText, decomp);
347 if (text == NULL)
348 status = U_MEMORY_ALLOCATION_ERROR;
349 }
350 */
351 int32_t length = sourceText.getLength();
352 UChar *buffer;
353 if (length > 0) {
354 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
355 /* test for NULL */
356 if (buffer == NULL) {
357 status = U_MEMORY_ALLOCATION_ERROR;
358 return;
359 }
360 /*
361 Using this constructor will prevent buffer from being removed when
362 string gets removed
363 */
364 UnicodeString string(buffer, length, length);
365 ((CharacterIterator &)sourceText).getText(string);
366 const UChar *temp = string.getBuffer();
367 u_memcpy(buffer, temp, length);
368 }
369 else {
370 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
371 /* test for NULL */
372 if (buffer == NULL) {
373 status = U_MEMORY_ALLOCATION_ERROR;
374 return;
375 }
376 *buffer = 0;
377 }
378 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
379
380 /* Test for buffer overflows */
381 if (U_FAILURE(status)) {
b75a7d8f 382 return;
73c04bcf
A
383 }
384 m_data_->isWritable = TRUE;
b75a7d8f
A
385}
386
387/* CollationElementIterator protected methods ----------------------------- */
388
389const CollationElementIterator& CollationElementIterator::operator=(
390 const CollationElementIterator& other)
391{
73c04bcf
A
392 if (this != &other)
393 {
394 UCollationElements *ucolelem = this->m_data_;
395 UCollationElements *otherucolelem = other.m_data_;
396 collIterate *coliter = &(ucolelem->iteratordata_);
397 collIterate *othercoliter = &(otherucolelem->iteratordata_);
398 int length = 0;
399
400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
401 // the normalization buffer
729e4ab9 402 length = (int)(othercoliter->endp - othercoliter->string);
73c04bcf
A
403
404 ucolelem->reset_ = otherucolelem->reset_;
405 ucolelem->isWritable = TRUE;
406
407 /* create a duplicate of string */
408 if (length > 0) {
409 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
410 if(coliter->string != NULL) {
729e4ab9 411 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
73c04bcf
A
412 length * U_SIZEOF_UCHAR);
413 } else { // Error: couldn't allocate memory. No copying should be done
414 length = 0;
415 }
416 }
417 else {
418 coliter->string = NULL;
419 }
420
421 /* start and end of string */
4388f060 422 coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
73c04bcf
A
423
424 /* handle writable buffer here */
425
426 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
729e4ab9
A
427 coliter->writableBuffer = othercoliter->writableBuffer;
428 coliter->writableBuffer.getTerminatedBuffer();
73c04bcf
A
429 }
430
431 /* current position */
432 if (othercoliter->pos >= othercoliter->string &&
433 othercoliter->pos <= othercoliter->endp)
434 {
4388f060 435 U_ASSERT(coliter->string != NULL);
73c04bcf
A
436 coliter->pos = coliter->string +
437 (othercoliter->pos - othercoliter->string);
438 }
46f4442e 439 else {
729e4ab9
A
440 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
441 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
46f4442e 442 }
73c04bcf
A
443
444 /* CE buffer */
46f4442e
A
445 int32_t CEsize;
446 if (coliter->extendCEs) {
447 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
448 CEsize = sizeof(othercoliter->extendCEs);
449 if (CEsize > 0) {
450 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
451 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
452 }
453 coliter->toReturn = coliter->extendCEs +
454 (othercoliter->toReturn - othercoliter->extendCEs);
455 coliter->CEpos = coliter->extendCEs + CEsize;
456 } else {
457 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
458 if (CEsize > 0) {
459 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
460 }
461 coliter->toReturn = coliter->CEs +
462 (othercoliter->toReturn - othercoliter->CEs);
463 coliter->CEpos = coliter->CEs + CEsize;
73c04bcf 464 }
73c04bcf
A
465
466 if (othercoliter->fcdPosition != NULL) {
4388f060 467 U_ASSERT(coliter->string != NULL);
73c04bcf
A
468 coliter->fcdPosition = coliter->string +
469 (othercoliter->fcdPosition
470 - othercoliter->string);
471 }
472 else {
473 coliter->fcdPosition = NULL;
474 }
475 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
476 coliter->origFlags = othercoliter->origFlags;
477 coliter->coll = othercoliter->coll;
478 this->isDataOwned_ = TRUE;
479 }
480
481 return *this;
b75a7d8f
A
482}
483
484U_NAMESPACE_END
485
486#endif /* #if !UCONFIG_NO_COLLATION */
487
488/* eof */