]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/coleitr.cpp
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / i18n / coleitr.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
374ca955 3* Copyright (C) 1996-2003, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5*******************************************************************************
6*/
7
8/*
9* File coleitr.cpp
10*
11*
12*
13* Created by: Helena Shih
14*
15* Modification History:
16*
17* Date Name Description
18*
19* 6/23/97 helena Adding comments to make code more readable.
20* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21* 12/10/99 aliu Ported Thai collation support from Java.
22* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23* 02/19/01 swquek Removed CollationElementsIterator() since it is
24* private constructor and no calls are made to it
25*/
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_COLLATION
30
31#include "unicode/coleitr.h"
32#include "unicode/ustring.h"
33#include "ucol_imp.h"
34#include "cmemory.h"
35
36
37/* Constants --------------------------------------------------------------- */
38
39U_NAMESPACE_BEGIN
40
374ca955 41UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
b75a7d8f
A
42
43/* synwee : public can't remove */
44int32_t const CollationElementIterator::NULLORDER = 0xffffffff;
45
46/* CollationElementIterator public constructor/destructor ------------------ */
47
48CollationElementIterator::CollationElementIterator(
49 const CollationElementIterator& other)
50 : UObject(other), isDataOwned_(TRUE)
51{
52 UErrorCode status = U_ZERO_ERROR;
53 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
54 &status);
55
56 *this = other;
57}
58
59CollationElementIterator::~CollationElementIterator()
60{
61 if (isDataOwned_) {
62 ucol_closeElements(m_data_);
63 }
64}
65
66/* CollationElementIterator public methods --------------------------------- */
67
68int32_t CollationElementIterator::getOffset() const
69{
70 return ucol_getOffset(m_data_);
71}
72
73/**
74* Get the ordering priority of the next character in the string.
75* @return the next character's ordering. Returns NULLORDER if an error has
76* occured or if the end of string has been reached
77*/
78int32_t CollationElementIterator::next(UErrorCode& status)
79{
80 return ucol_next(m_data_, &status);
81}
82
83UBool CollationElementIterator::operator!=(
84 const CollationElementIterator& other) const
85{
86 return !(*this == other);
87}
88
89UBool CollationElementIterator::operator==(
90 const CollationElementIterator& that) const
91{
92 if (this == &that) {
93 return TRUE;
94 }
95
96 if (m_data_ == that.m_data_) {
97 return TRUE;
98 }
99
100 // option comparison
374ca955 101 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
b75a7d8f
A
102 {
103 return FALSE;
104 }
105
106 // the constructor and setText always sets a length
107 // and we only compare the string not the contents of the normalization
108 // buffer
109 int thislength = m_data_->iteratordata_.endp -
110 m_data_->iteratordata_.string;
111 int thatlength = that.m_data_->iteratordata_.endp -
112 that.m_data_->iteratordata_.string;
113
114 if (thislength != thatlength) {
115 return FALSE;
116 }
117
118 if (uprv_memcmp(m_data_->iteratordata_.string,
119 that.m_data_->iteratordata_.string,
120 thislength * U_SIZEOF_UCHAR) != 0) {
121 return FALSE;
122 }
123 if (getOffset() != that.getOffset()) {
124 return FALSE;
125 }
126
127 // checking normalization buffer
128 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
129 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
130 return FALSE;
131 }
132 // both are in the normalization buffer
133 if (m_data_->iteratordata_.pos
134 - m_data_->iteratordata_.writableBuffer
135 != that.m_data_->iteratordata_.pos
136 - that.m_data_->iteratordata_.writableBuffer) {
137 // not in the same position in the normalization buffer
138 return FALSE;
139 }
140 }
141 else if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
142 return FALSE;
143 }
144 // checking ce position
145 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
146 == (that.m_data_->iteratordata_.CEpos
147 - that.m_data_->iteratordata_.CEs);
148}
149
150/**
151* Get the ordering priority of the previous collation element in the string.
152* @param status the error code status.
153* @return the previous element's ordering. Returns NULLORDER if an error has
154* occured or if the start of string has been reached.
155*/
156int32_t CollationElementIterator::previous(UErrorCode& status)
157{
158 return ucol_previous(m_data_, &status);
159}
160
161/**
162* Resets the cursor to the beginning of the string.
163*/
164void CollationElementIterator::reset()
165{
166 ucol_reset(m_data_);
167}
168
169void CollationElementIterator::setOffset(int32_t newOffset,
170 UErrorCode& status)
171{
172 ucol_setOffset(m_data_, newOffset, &status);
173}
174
175/**
176* Sets the source to the new source string.
177*/
178void CollationElementIterator::setText(const UnicodeString& source,
179 UErrorCode& status)
180{
181 if (U_FAILURE(status)) {
182 return;
183 }
184
185 int32_t length = source.length();
186 UChar *string = NULL;
187 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
188 uprv_free(m_data_->iteratordata_.string);
189 }
190 m_data_->isWritable = TRUE;
191 if (length > 0) {
192 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
193 /* test for NULL */
194 if (string == NULL) {
195 status = U_MEMORY_ALLOCATION_ERROR;
196 return;
197 }
198 u_memcpy(string, source.getBuffer(), length);
199 }
200 else {
201 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
202 /* test for NULL */
203 if (string == NULL) {
204 status = U_MEMORY_ALLOCATION_ERROR;
205 return;
206 }
207 *string = 0;
208 }
209 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
210 &m_data_->iteratordata_);
211
212 m_data_->reset_ = TRUE;
213}
214
215// Sets the source to the new character iterator.
216void CollationElementIterator::setText(CharacterIterator& source,
217 UErrorCode& status)
218{
219 if (U_FAILURE(status))
220 return;
221
222 int32_t length = source.getLength();
223 UChar *buffer = NULL;
224
225 if (length == 0) {
226 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
227 /* test for NULL */
228 if (buffer == NULL) {
229 status = U_MEMORY_ALLOCATION_ERROR;
230 return;
231 }
232 *buffer = 0;
233 }
234 else {
235 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
236 /* test for NULL */
237 if (buffer == NULL) {
238 status = U_MEMORY_ALLOCATION_ERROR;
239 return;
240 }
241 /*
242 Using this constructor will prevent buffer from being removed when
243 string gets removed
244 */
245 UnicodeString string;
246 source.getText(string);
247 u_memcpy(buffer, string.getBuffer(), length);
248 }
249
250 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
251 uprv_free(m_data_->iteratordata_.string);
252 }
253 m_data_->isWritable = TRUE;
254 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
255 &m_data_->iteratordata_);
256 m_data_->reset_ = TRUE;
257}
258
259int32_t CollationElementIterator::strengthOrder(int32_t order) const
260{
261 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
262 // Mask off the unwanted differences.
263 if (s == UCOL_PRIMARY) {
264 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
265 }
266 else if (s == UCOL_SECONDARY) {
267 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
268 }
269
270 return order;
271}
272
273/* CollationElementIterator private constructors/destructors --------------- */
274
275/**
276* This is the "real" constructor for this class; it constructs an iterator
277* over the source text using the specified collator
278*/
279CollationElementIterator::CollationElementIterator(
280 const UnicodeString& sourceText,
281 const RuleBasedCollator* order,
282 UErrorCode& status)
283 : isDataOwned_(TRUE)
284{
285 if (U_FAILURE(status)) {
286 return;
287 }
288
289 int32_t length = sourceText.length();
290 UChar *string = NULL;
291
292 if (length > 0) {
293 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
294 /* test for NULL */
295 if (string == NULL) {
296 status = U_MEMORY_ALLOCATION_ERROR;
297 return;
298 }
299 /*
300 Using this constructor will prevent buffer from being removed when
301 string gets removed
302 */
303 u_memcpy(string, sourceText.getBuffer(), length);
304 }
305 else {
306 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
307 /* test for NULL */
308 if (string == NULL) {
309 status = U_MEMORY_ALLOCATION_ERROR;
310 return;
311 }
312 *string = 0;
313 }
314 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
315
316 /* Test for buffer overflows */
317 if (U_FAILURE(status)) {
318 return;
319 }
320 m_data_->isWritable = TRUE;
321}
322
323/**
324* This is the "real" constructor for this class; it constructs an iterator over
325* the source text using the specified collator
326*/
327CollationElementIterator::CollationElementIterator(
328 const CharacterIterator& sourceText,
329 const RuleBasedCollator* order,
330 UErrorCode& status)
331 : isDataOwned_(TRUE)
332{
333 if (U_FAILURE(status))
334 return;
335
336 // **** should I just drop this test? ****
337 /*
338 if ( sourceText.endIndex() != 0 )
339 {
340 // A CollationElementIterator is really a two-layered beast.
341 // Internally it uses a Normalizer to munge the source text into a form
374ca955 342 // where all "composed" Unicode characters (such as \u00FC) are split into a
b75a7d8f
A
343 // normal character and a combining accent character.
344 // Afterward, CollationElementIterator does its own processing to handle
345 // expanding and contracting collation sequences, ignorables, and so on.
346
347 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
348 ? Normalizer::NO_OP : order->getDecomposition();
349
350 text = new Normalizer(sourceText, decomp);
351 if (text == NULL)
352 status = U_MEMORY_ALLOCATION_ERROR;
353 }
354 */
355 int32_t length = sourceText.getLength();
356 UChar *buffer;
357 if (length > 0) {
358 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
359 /* test for NULL */
360 if (buffer == NULL) {
361 status = U_MEMORY_ALLOCATION_ERROR;
362 return;
363 }
364 /*
365 Using this constructor will prevent buffer from being removed when
366 string gets removed
367 */
368 UnicodeString string(buffer, length, length);
369 ((CharacterIterator &)sourceText).getText(string);
370 const UChar *temp = string.getBuffer();
371 u_memcpy(buffer, temp, length);
372 }
373 else {
374 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
375 /* test for NULL */
376 if (buffer == NULL) {
377 status = U_MEMORY_ALLOCATION_ERROR;
378 return;
379 }
380 *buffer = 0;
381 }
382 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
383
384 /* Test for buffer overflows */
385 if (U_FAILURE(status)) {
386 return;
387 }
388 m_data_->isWritable = TRUE;
389}
390
391/* CollationElementIterator protected methods ----------------------------- */
392
393const CollationElementIterator& CollationElementIterator::operator=(
394 const CollationElementIterator& other)
395{
396 if (this != &other)
397 {
398 UCollationElements *ucolelem = this->m_data_;
399 UCollationElements *otherucolelem = other.m_data_;
400 collIterate *coliter = &(ucolelem->iteratordata_);
401 collIterate *othercoliter = &(otherucolelem->iteratordata_);
402 int length = 0;
403
404 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
405 // the normalization buffer
406 length = othercoliter->endp - othercoliter->string;
407
408 ucolelem->reset_ = otherucolelem->reset_;
409 ucolelem->isWritable = TRUE;
410
411 /* create a duplicate of string */
412 if (length > 0) {
413 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
414 if(coliter->string != NULL) {
415 uprv_memcpy(coliter->string, othercoliter->string,
416 length * U_SIZEOF_UCHAR);
417 } else { // Error: couldn't allocate memory. No copying should be done
418 length = 0;
419 }
420 }
421 else {
422 coliter->string = NULL;
423 }
424
425 /* start and end of string */
426 coliter->endp = coliter->string + length;
427
428 /* handle writable buffer here */
429
430 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
431 uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
432 if (wlength < coliter->writableBufSize) {
433 uprv_memcpy(coliter->stackWritableBuffer,
434 othercoliter->stackWritableBuffer,
435 othercoliter->writableBufSize * U_SIZEOF_UCHAR);
436 }
437 else {
438 if (coliter->writableBuffer != coliter->stackWritableBuffer) {
439 uprv_free(coliter->writableBuffer);
440 }
441 coliter->writableBuffer = (UChar *)uprv_malloc(
442 wlength * U_SIZEOF_UCHAR);
443 if(coliter->writableBuffer != NULL) {
444 uprv_memcpy(coliter->writableBuffer,
445 othercoliter->writableBuffer,
446 wlength * U_SIZEOF_UCHAR);
447 coliter->writableBufSize = wlength;
448 } else { // Error: couldn't allocate memory for writableBuffer
449 coliter->writableBufSize = 0;
450 }
451 }
452 }
453
454 /* current position */
455 if (othercoliter->pos >= othercoliter->string &&
456 othercoliter->pos <= othercoliter->endp) {
457 coliter->pos = coliter->string +
458 (othercoliter->pos - othercoliter->string);
459 }
460 else {
461 coliter->pos = coliter->writableBuffer +
462 (othercoliter->pos - othercoliter->writableBuffer);
463 }
464
465 /* CE buffer */
466 uprv_memcpy(coliter->CEs, othercoliter->CEs,
467 UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t));
468 coliter->toReturn = coliter->CEs +
469 (othercoliter->toReturn - othercoliter->CEs);
470 coliter->CEpos = coliter->CEs +
471 (othercoliter->CEpos - othercoliter->CEs);
472
473 if (othercoliter->fcdPosition != NULL) {
474 coliter->fcdPosition = coliter->string +
475 (othercoliter->fcdPosition
476 - othercoliter->string);
477 }
478 else {
479 coliter->fcdPosition = NULL;
480 }
481 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
482 coliter->origFlags = othercoliter->origFlags;
483 coliter->coll = othercoliter->coll;
484 this->isDataOwned_ = TRUE;
485 }
486
487 return *this;
488}
489
490U_NAMESPACE_END
491
492#endif /* #if !UCONFIG_NO_COLLATION */
493
494/* eof */