]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/coleitr.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / i18n / coleitr.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
729e4ab9 3* Copyright (C) 1996-2010, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5*******************************************************************************
6*/
7
8/*
9* File coleitr.cpp
10*
11*
12*
13* Created by: Helena Shih
14*
15* Modification History:
16*
17* Date Name Description
18*
19* 6/23/97 helena Adding comments to make code more readable.
20* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21* 12/10/99 aliu Ported Thai collation support from Java.
22* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23* 02/19/01 swquek Removed CollationElementsIterator() since it is
24* private constructor and no calls are made to it
25*/
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_COLLATION
30
31#include "unicode/coleitr.h"
32#include "unicode/ustring.h"
33#include "ucol_imp.h"
34#include "cmemory.h"
35
36
37/* Constants --------------------------------------------------------------- */
38
39U_NAMESPACE_BEGIN
40
374ca955 41UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
b75a7d8f 42
b75a7d8f
A
43/* CollationElementIterator public constructor/destructor ------------------ */
44
45CollationElementIterator::CollationElementIterator(
46 const CollationElementIterator& other)
47 : UObject(other), isDataOwned_(TRUE)
48{
73c04bcf
A
49 UErrorCode status = U_ZERO_ERROR;
50 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
51 &status);
b75a7d8f 52
73c04bcf 53 *this = other;
b75a7d8f
A
54}
55
56CollationElementIterator::~CollationElementIterator()
57{
73c04bcf
A
58 if (isDataOwned_) {
59 ucol_closeElements(m_data_);
60 }
b75a7d8f
A
61}
62
63/* CollationElementIterator public methods --------------------------------- */
64
65int32_t CollationElementIterator::getOffset() const
66{
73c04bcf 67 return ucol_getOffset(m_data_);
b75a7d8f
A
68}
69
70/**
71* Get the ordering priority of the next character in the string.
72* @return the next character's ordering. Returns NULLORDER if an error has
73* occured or if the end of string has been reached
74*/
75int32_t CollationElementIterator::next(UErrorCode& status)
76{
73c04bcf 77 return ucol_next(m_data_, &status);
b75a7d8f
A
78}
79
80UBool CollationElementIterator::operator!=(
81 const CollationElementIterator& other) const
82{
73c04bcf 83 return !(*this == other);
b75a7d8f
A
84}
85
86UBool CollationElementIterator::operator==(
87 const CollationElementIterator& that) const
88{
46f4442e 89 if (this == &that || m_data_ == that.m_data_) {
b75a7d8f
A
90 return TRUE;
91 }
92
93 // option comparison
374ca955 94 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
b75a7d8f
A
95 {
96 return FALSE;
97 }
98
99 // the constructor and setText always sets a length
100 // and we only compare the string not the contents of the normalization
101 // buffer
729e4ab9
A
102 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
103 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
b75a7d8f
A
104
105 if (thislength != thatlength) {
106 return FALSE;
107 }
108
109 if (uprv_memcmp(m_data_->iteratordata_.string,
110 that.m_data_->iteratordata_.string,
111 thislength * U_SIZEOF_UCHAR) != 0) {
112 return FALSE;
113 }
114 if (getOffset() != that.getOffset()) {
115 return FALSE;
116 }
117
118 // checking normalization buffer
119 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
46f4442e 120 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
b75a7d8f
A
121 return FALSE;
122 }
123 // both are in the normalization buffer
124 if (m_data_->iteratordata_.pos
729e4ab9 125 - m_data_->iteratordata_.writableBuffer.getBuffer()
b75a7d8f 126 != that.m_data_->iteratordata_.pos
729e4ab9 127 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
b75a7d8f
A
128 // not in the same position in the normalization buffer
129 return FALSE;
130 }
131 }
46f4442e 132 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
b75a7d8f
A
133 return FALSE;
134 }
135 // checking ce position
136 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
137 == (that.m_data_->iteratordata_.CEpos
138 - that.m_data_->iteratordata_.CEs);
139}
140
141/**
142* Get the ordering priority of the previous collation element in the string.
143* @param status the error code status.
144* @return the previous element's ordering. Returns NULLORDER if an error has
145* occured or if the start of string has been reached.
146*/
147int32_t CollationElementIterator::previous(UErrorCode& status)
148{
73c04bcf 149 return ucol_previous(m_data_, &status);
b75a7d8f
A
150}
151
152/**
153* Resets the cursor to the beginning of the string.
154*/
155void CollationElementIterator::reset()
156{
73c04bcf 157 ucol_reset(m_data_);
b75a7d8f
A
158}
159
160void CollationElementIterator::setOffset(int32_t newOffset,
161 UErrorCode& status)
162{
73c04bcf 163 ucol_setOffset(m_data_, newOffset, &status);
b75a7d8f
A
164}
165
166/**
167* Sets the source to the new source string.
168*/
169void CollationElementIterator::setText(const UnicodeString& source,
170 UErrorCode& status)
171{
73c04bcf 172 if (U_FAILURE(status)) {
b75a7d8f
A
173 return;
174 }
73c04bcf
A
175
176 int32_t length = source.length();
177 UChar *string = NULL;
178 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
729e4ab9 179 uprv_free((UChar *)m_data_->iteratordata_.string);
73c04bcf
A
180 }
181 m_data_->isWritable = TRUE;
182 if (length > 0) {
183 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
184 /* test for NULL */
185 if (string == NULL) {
186 status = U_MEMORY_ALLOCATION_ERROR;
187 return;
188 }
189 u_memcpy(string, source.getBuffer(), length);
190 }
191 else {
192 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
193 /* test for NULL */
194 if (string == NULL) {
195 status = U_MEMORY_ALLOCATION_ERROR;
196 return;
197 }
198 *string = 0;
b75a7d8f 199 }
46f4442e 200 /* Free offsetBuffer before initializing it. */
729e4ab9 201 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
73c04bcf 202 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
729e4ab9 203 &m_data_->iteratordata_, &status);
b75a7d8f 204
73c04bcf 205 m_data_->reset_ = TRUE;
b75a7d8f
A
206}
207
208// Sets the source to the new character iterator.
209void CollationElementIterator::setText(CharacterIterator& source,
210 UErrorCode& status)
211{
73c04bcf 212 if (U_FAILURE(status))
b75a7d8f 213 return;
73c04bcf
A
214
215 int32_t length = source.getLength();
216 UChar *buffer = NULL;
217
218 if (length == 0) {
219 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
220 /* test for NULL */
221 if (buffer == NULL) {
222 status = U_MEMORY_ALLOCATION_ERROR;
223 return;
224 }
225 *buffer = 0;
b75a7d8f 226 }
73c04bcf
A
227 else {
228 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
229 /* test for NULL */
230 if (buffer == NULL) {
231 status = U_MEMORY_ALLOCATION_ERROR;
232 return;
233 }
234 /*
235 Using this constructor will prevent buffer from being removed when
236 string gets removed
237 */
238 UnicodeString string;
239 source.getText(string);
240 u_memcpy(buffer, string.getBuffer(), length);
241 }
242
243 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
729e4ab9 244 uprv_free((UChar *)m_data_->iteratordata_.string);
73c04bcf
A
245 }
246 m_data_->isWritable = TRUE;
46f4442e 247 /* Free offsetBuffer before initializing it. */
729e4ab9 248 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
73c04bcf 249 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
729e4ab9 250 &m_data_->iteratordata_, &status);
73c04bcf 251 m_data_->reset_ = TRUE;
b75a7d8f
A
252}
253
254int32_t CollationElementIterator::strengthOrder(int32_t order) const
255{
73c04bcf
A
256 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
257 // Mask off the unwanted differences.
258 if (s == UCOL_PRIMARY) {
259 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
260 }
261 else if (s == UCOL_SECONDARY) {
262 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
263 }
264
265 return order;
b75a7d8f
A
266}
267
268/* CollationElementIterator private constructors/destructors --------------- */
269
270/**
271* This is the "real" constructor for this class; it constructs an iterator
272* over the source text using the specified collator
273*/
274CollationElementIterator::CollationElementIterator(
275 const UnicodeString& sourceText,
276 const RuleBasedCollator* order,
277 UErrorCode& status)
278 : isDataOwned_(TRUE)
279{
73c04bcf 280 if (U_FAILURE(status)) {
b75a7d8f 281 return;
73c04bcf
A
282 }
283
284 int32_t length = sourceText.length();
285 UChar *string = NULL;
286
287 if (length > 0) {
288 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
289 /* test for NULL */
290 if (string == NULL) {
291 status = U_MEMORY_ALLOCATION_ERROR;
292 return;
293 }
294 /*
295 Using this constructor will prevent buffer from being removed when
296 string gets removed
297 */
298 u_memcpy(string, sourceText.getBuffer(), length);
299 }
300 else {
301 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
302 /* test for NULL */
303 if (string == NULL) {
304 status = U_MEMORY_ALLOCATION_ERROR;
305 return;
306 }
307 *string = 0;
308 }
309 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
310
311 /* Test for buffer overflows */
312 if (U_FAILURE(status)) {
313 return;
314 }
315 m_data_->isWritable = TRUE;
b75a7d8f
A
316}
317
318/**
319* This is the "real" constructor for this class; it constructs an iterator over
320* the source text using the specified collator
321*/
322CollationElementIterator::CollationElementIterator(
323 const CharacterIterator& sourceText,
324 const RuleBasedCollator* order,
325 UErrorCode& status)
326 : isDataOwned_(TRUE)
327{
73c04bcf 328 if (U_FAILURE(status))
b75a7d8f 329 return;
73c04bcf
A
330
331 // **** should I just drop this test? ****
332 /*
333 if ( sourceText.endIndex() != 0 )
334 {
335 // A CollationElementIterator is really a two-layered beast.
336 // Internally it uses a Normalizer to munge the source text into a form
337 // where all "composed" Unicode characters (such as \u00FC) are split into a
338 // normal character and a combining accent character.
339 // Afterward, CollationElementIterator does its own processing to handle
340 // expanding and contracting collation sequences, ignorables, and so on.
341
342 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
343 ? Normalizer::NO_OP : order->getDecomposition();
344
345 text = new Normalizer(sourceText, decomp);
346 if (text == NULL)
347 status = U_MEMORY_ALLOCATION_ERROR;
348 }
349 */
350 int32_t length = sourceText.getLength();
351 UChar *buffer;
352 if (length > 0) {
353 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
354 /* test for NULL */
355 if (buffer == NULL) {
356 status = U_MEMORY_ALLOCATION_ERROR;
357 return;
358 }
359 /*
360 Using this constructor will prevent buffer from being removed when
361 string gets removed
362 */
363 UnicodeString string(buffer, length, length);
364 ((CharacterIterator &)sourceText).getText(string);
365 const UChar *temp = string.getBuffer();
366 u_memcpy(buffer, temp, length);
367 }
368 else {
369 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
370 /* test for NULL */
371 if (buffer == NULL) {
372 status = U_MEMORY_ALLOCATION_ERROR;
373 return;
374 }
375 *buffer = 0;
376 }
377 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
378
379 /* Test for buffer overflows */
380 if (U_FAILURE(status)) {
b75a7d8f 381 return;
73c04bcf
A
382 }
383 m_data_->isWritable = TRUE;
b75a7d8f
A
384}
385
386/* CollationElementIterator protected methods ----------------------------- */
387
388const CollationElementIterator& CollationElementIterator::operator=(
389 const CollationElementIterator& other)
390{
73c04bcf
A
391 if (this != &other)
392 {
393 UCollationElements *ucolelem = this->m_data_;
394 UCollationElements *otherucolelem = other.m_data_;
395 collIterate *coliter = &(ucolelem->iteratordata_);
396 collIterate *othercoliter = &(otherucolelem->iteratordata_);
397 int length = 0;
398
399 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
400 // the normalization buffer
729e4ab9 401 length = (int)(othercoliter->endp - othercoliter->string);
73c04bcf
A
402
403 ucolelem->reset_ = otherucolelem->reset_;
404 ucolelem->isWritable = TRUE;
405
406 /* create a duplicate of string */
407 if (length > 0) {
408 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
409 if(coliter->string != NULL) {
729e4ab9 410 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
73c04bcf
A
411 length * U_SIZEOF_UCHAR);
412 } else { // Error: couldn't allocate memory. No copying should be done
413 length = 0;
414 }
415 }
416 else {
417 coliter->string = NULL;
418 }
419
420 /* start and end of string */
421 coliter->endp = coliter->string + length;
422
423 /* handle writable buffer here */
424
425 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
729e4ab9
A
426 coliter->writableBuffer = othercoliter->writableBuffer;
427 coliter->writableBuffer.getTerminatedBuffer();
73c04bcf
A
428 }
429
430 /* current position */
431 if (othercoliter->pos >= othercoliter->string &&
432 othercoliter->pos <= othercoliter->endp)
433 {
434 coliter->pos = coliter->string +
435 (othercoliter->pos - othercoliter->string);
436 }
46f4442e 437 else {
729e4ab9
A
438 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
439 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
46f4442e 440 }
73c04bcf
A
441
442 /* CE buffer */
46f4442e
A
443 int32_t CEsize;
444 if (coliter->extendCEs) {
445 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
446 CEsize = sizeof(othercoliter->extendCEs);
447 if (CEsize > 0) {
448 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
449 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
450 }
451 coliter->toReturn = coliter->extendCEs +
452 (othercoliter->toReturn - othercoliter->extendCEs);
453 coliter->CEpos = coliter->extendCEs + CEsize;
454 } else {
455 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
456 if (CEsize > 0) {
457 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
458 }
459 coliter->toReturn = coliter->CEs +
460 (othercoliter->toReturn - othercoliter->CEs);
461 coliter->CEpos = coliter->CEs + CEsize;
73c04bcf 462 }
73c04bcf
A
463
464 if (othercoliter->fcdPosition != NULL) {
465 coliter->fcdPosition = coliter->string +
466 (othercoliter->fcdPosition
467 - othercoliter->string);
468 }
469 else {
470 coliter->fcdPosition = NULL;
471 }
472 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
473 coliter->origFlags = othercoliter->origFlags;
474 coliter->coll = othercoliter->coll;
475 this->isDataOwned_ = TRUE;
476 }
477
478 return *this;
b75a7d8f
A
479}
480
481U_NAMESPACE_END
482
483#endif /* #if !UCONFIG_NO_COLLATION */
484
485/* eof */