2 *******************************************************************************
3 * Copyright (C) 1996-2008, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
13 * Created by: Helena Shih
15 * Modification History:
17 * Date Name Description
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
27 #include "unicode/utypes.h"
29 #if !UCONFIG_NO_COLLATION
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
37 /* Constants --------------------------------------------------------------- */
41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator
)
43 /* CollationElementIterator public constructor/destructor ------------------ */
45 CollationElementIterator::CollationElementIterator(
46 const CollationElementIterator
& other
)
47 : UObject(other
), isDataOwned_(TRUE
)
49 UErrorCode status
= U_ZERO_ERROR
;
50 m_data_
= ucol_openElements(other
.m_data_
->iteratordata_
.coll
, NULL
, 0,
56 CollationElementIterator::~CollationElementIterator()
59 ucol_closeElements(m_data_
);
63 /* CollationElementIterator public methods --------------------------------- */
65 int32_t CollationElementIterator::getOffset() const
67 return ucol_getOffset(m_data_
);
71 * Get the ordering priority of the next character in the string.
72 * @return the next character's ordering. Returns NULLORDER if an error has
73 * occured or if the end of string has been reached
75 int32_t CollationElementIterator::next(UErrorCode
& status
)
77 return ucol_next(m_data_
, &status
);
80 UBool
CollationElementIterator::operator!=(
81 const CollationElementIterator
& other
) const
83 return !(*this == other
);
86 UBool
CollationElementIterator::operator==(
87 const CollationElementIterator
& that
) const
89 if (this == &that
|| m_data_
== that
.m_data_
) {
94 if (m_data_
->iteratordata_
.coll
!= that
.m_data_
->iteratordata_
.coll
)
99 // the constructor and setText always sets a length
100 // and we only compare the string not the contents of the normalization
102 int thislength
= m_data_
->iteratordata_
.endp
-
103 m_data_
->iteratordata_
.string
;
104 int thatlength
= that
.m_data_
->iteratordata_
.endp
-
105 that
.m_data_
->iteratordata_
.string
;
107 if (thislength
!= thatlength
) {
111 if (uprv_memcmp(m_data_
->iteratordata_
.string
,
112 that
.m_data_
->iteratordata_
.string
,
113 thislength
* U_SIZEOF_UCHAR
) != 0) {
116 if (getOffset() != that
.getOffset()) {
120 // checking normalization buffer
121 if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
122 if ((that
.m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) != 0) {
125 // both are in the normalization buffer
126 if (m_data_
->iteratordata_
.pos
127 - m_data_
->iteratordata_
.writableBuffer
128 != that
.m_data_
->iteratordata_
.pos
129 - that
.m_data_
->iteratordata_
.writableBuffer
) {
130 // not in the same position in the normalization buffer
134 else if ((that
.m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
137 // checking ce position
138 return (m_data_
->iteratordata_
.CEpos
- m_data_
->iteratordata_
.CEs
)
139 == (that
.m_data_
->iteratordata_
.CEpos
140 - that
.m_data_
->iteratordata_
.CEs
);
144 * Get the ordering priority of the previous collation element in the string.
145 * @param status the error code status.
146 * @return the previous element's ordering. Returns NULLORDER if an error has
147 * occured or if the start of string has been reached.
149 int32_t CollationElementIterator::previous(UErrorCode
& status
)
151 return ucol_previous(m_data_
, &status
);
155 * Resets the cursor to the beginning of the string.
157 void CollationElementIterator::reset()
162 void CollationElementIterator::setOffset(int32_t newOffset
,
165 ucol_setOffset(m_data_
, newOffset
, &status
);
169 * Sets the source to the new source string.
171 void CollationElementIterator::setText(const UnicodeString
& source
,
174 if (U_FAILURE(status
)) {
178 int32_t length
= source
.length();
179 UChar
*string
= NULL
;
180 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
181 uprv_free(m_data_
->iteratordata_
.string
);
183 m_data_
->isWritable
= TRUE
;
185 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
187 if (string
== NULL
) {
188 status
= U_MEMORY_ALLOCATION_ERROR
;
191 u_memcpy(string
, source
.getBuffer(), length
);
194 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
196 if (string
== NULL
) {
197 status
= U_MEMORY_ALLOCATION_ERROR
;
202 /* Free offsetBuffer before initializing it. */
203 freeOffsetBuffer(&(m_data_
->iteratordata_
));
204 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, string
, length
,
205 &m_data_
->iteratordata_
);
207 m_data_
->reset_
= TRUE
;
210 // Sets the source to the new character iterator.
211 void CollationElementIterator::setText(CharacterIterator
& source
,
214 if (U_FAILURE(status
))
217 int32_t length
= source
.getLength();
218 UChar
*buffer
= NULL
;
221 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
223 if (buffer
== NULL
) {
224 status
= U_MEMORY_ALLOCATION_ERROR
;
230 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
232 if (buffer
== NULL
) {
233 status
= U_MEMORY_ALLOCATION_ERROR
;
237 Using this constructor will prevent buffer from being removed when
240 UnicodeString string
;
241 source
.getText(string
);
242 u_memcpy(buffer
, string
.getBuffer(), length
);
245 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
246 uprv_free(m_data_
->iteratordata_
.string
);
248 m_data_
->isWritable
= TRUE
;
249 /* Free offsetBuffer before initializing it. */
250 freeOffsetBuffer(&(m_data_
->iteratordata_
));
251 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, buffer
, length
,
252 &m_data_
->iteratordata_
);
253 m_data_
->reset_
= TRUE
;
256 int32_t CollationElementIterator::strengthOrder(int32_t order
) const
258 UCollationStrength s
= ucol_getStrength(m_data_
->iteratordata_
.coll
);
259 // Mask off the unwanted differences.
260 if (s
== UCOL_PRIMARY
) {
261 order
&= RuleBasedCollator::PRIMARYDIFFERENCEONLY
;
263 else if (s
== UCOL_SECONDARY
) {
264 order
&= RuleBasedCollator::SECONDARYDIFFERENCEONLY
;
270 /* CollationElementIterator private constructors/destructors --------------- */
273 * This is the "real" constructor for this class; it constructs an iterator
274 * over the source text using the specified collator
276 CollationElementIterator::CollationElementIterator(
277 const UnicodeString
& sourceText
,
278 const RuleBasedCollator
* order
,
282 if (U_FAILURE(status
)) {
286 int32_t length
= sourceText
.length();
287 UChar
*string
= NULL
;
290 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
292 if (string
== NULL
) {
293 status
= U_MEMORY_ALLOCATION_ERROR
;
297 Using this constructor will prevent buffer from being removed when
300 u_memcpy(string
, sourceText
.getBuffer(), length
);
303 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
305 if (string
== NULL
) {
306 status
= U_MEMORY_ALLOCATION_ERROR
;
311 m_data_
= ucol_openElements(order
->ucollator
, string
, length
, &status
);
313 /* Test for buffer overflows */
314 if (U_FAILURE(status
)) {
317 m_data_
->isWritable
= TRUE
;
321 * This is the "real" constructor for this class; it constructs an iterator over
322 * the source text using the specified collator
324 CollationElementIterator::CollationElementIterator(
325 const CharacterIterator
& sourceText
,
326 const RuleBasedCollator
* order
,
330 if (U_FAILURE(status
))
333 // **** should I just drop this test? ****
335 if ( sourceText.endIndex() != 0 )
337 // A CollationElementIterator is really a two-layered beast.
338 // Internally it uses a Normalizer to munge the source text into a form
339 // where all "composed" Unicode characters (such as \u00FC) are split into a
340 // normal character and a combining accent character.
341 // Afterward, CollationElementIterator does its own processing to handle
342 // expanding and contracting collation sequences, ignorables, and so on.
344 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
345 ? Normalizer::NO_OP : order->getDecomposition();
347 text = new Normalizer(sourceText, decomp);
349 status = U_MEMORY_ALLOCATION_ERROR;
352 int32_t length
= sourceText
.getLength();
355 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
357 if (buffer
== NULL
) {
358 status
= U_MEMORY_ALLOCATION_ERROR
;
362 Using this constructor will prevent buffer from being removed when
365 UnicodeString
string(buffer
, length
, length
);
366 ((CharacterIterator
&)sourceText
).getText(string
);
367 const UChar
*temp
= string
.getBuffer();
368 u_memcpy(buffer
, temp
, length
);
371 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
373 if (buffer
== NULL
) {
374 status
= U_MEMORY_ALLOCATION_ERROR
;
379 m_data_
= ucol_openElements(order
->ucollator
, buffer
, length
, &status
);
381 /* Test for buffer overflows */
382 if (U_FAILURE(status
)) {
385 m_data_
->isWritable
= TRUE
;
388 /* CollationElementIterator protected methods ----------------------------- */
390 const CollationElementIterator
& CollationElementIterator::operator=(
391 const CollationElementIterator
& other
)
395 UCollationElements
*ucolelem
= this->m_data_
;
396 UCollationElements
*otherucolelem
= other
.m_data_
;
397 collIterate
*coliter
= &(ucolelem
->iteratordata_
);
398 collIterate
*othercoliter
= &(otherucolelem
->iteratordata_
);
401 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
402 // the normalization buffer
403 length
= othercoliter
->endp
- othercoliter
->string
;
405 ucolelem
->reset_
= otherucolelem
->reset_
;
406 ucolelem
->isWritable
= TRUE
;
408 /* create a duplicate of string */
410 coliter
->string
= (UChar
*)uprv_malloc(length
* U_SIZEOF_UCHAR
);
411 if(coliter
->string
!= NULL
) {
412 uprv_memcpy(coliter
->string
, othercoliter
->string
,
413 length
* U_SIZEOF_UCHAR
);
414 } else { // Error: couldn't allocate memory. No copying should be done
419 coliter
->string
= NULL
;
422 /* start and end of string */
423 coliter
->endp
= coliter
->string
+ length
;
425 /* handle writable buffer here */
427 if (othercoliter
->flags
& UCOL_ITER_INNORMBUF
) {
428 uint32_t wlength
= u_strlen(othercoliter
->writableBuffer
) + 1;
429 if (wlength
< coliter
->writableBufSize
) {
430 uprv_memcpy(coliter
->stackWritableBuffer
,
431 othercoliter
->stackWritableBuffer
,
432 wlength
* U_SIZEOF_UCHAR
);
435 if (coliter
->writableBuffer
!= coliter
->stackWritableBuffer
) {
436 uprv_free(coliter
->writableBuffer
);
438 coliter
->writableBuffer
= (UChar
*)uprv_malloc(
439 wlength
* U_SIZEOF_UCHAR
);
440 if(coliter
->writableBuffer
!= NULL
) {
441 uprv_memcpy(coliter
->writableBuffer
,
442 othercoliter
->writableBuffer
,
443 wlength
* U_SIZEOF_UCHAR
);
444 coliter
->writableBufSize
= wlength
;
445 } else { // Error: couldn't allocate memory for writableBuffer
446 coliter
->writableBufSize
= 0;
451 /* current position */
452 if (othercoliter
->pos
>= othercoliter
->string
&&
453 othercoliter
->pos
<= othercoliter
->endp
)
455 coliter
->pos
= coliter
->string
+
456 (othercoliter
->pos
- othercoliter
->string
);
458 else if (coliter
->writableBuffer
!= NULL
) {
459 coliter
->pos
= coliter
->writableBuffer
+
460 (othercoliter
->pos
- othercoliter
->writableBuffer
);
463 // Error: couldn't allocate memory for writableBuffer
469 if (coliter
->extendCEs
) {
470 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE
);
471 CEsize
= sizeof(othercoliter
->extendCEs
);
473 othercoliter
->extendCEs
= (uint32_t *)uprv_malloc(CEsize
);
474 uprv_memcpy(coliter
->extendCEs
, othercoliter
->extendCEs
, CEsize
);
476 coliter
->toReturn
= coliter
->extendCEs
+
477 (othercoliter
->toReturn
- othercoliter
->extendCEs
);
478 coliter
->CEpos
= coliter
->extendCEs
+ CEsize
;
480 CEsize
= (int32_t)(othercoliter
->CEpos
- othercoliter
->CEs
);
482 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
, CEsize
);
484 coliter
->toReturn
= coliter
->CEs
+
485 (othercoliter
->toReturn
- othercoliter
->CEs
);
486 coliter
->CEpos
= coliter
->CEs
+ CEsize
;
489 if (othercoliter
->fcdPosition
!= NULL
) {
490 coliter
->fcdPosition
= coliter
->string
+
491 (othercoliter
->fcdPosition
492 - othercoliter
->string
);
495 coliter
->fcdPosition
= NULL
;
497 coliter
->flags
= othercoliter
->flags
/*| UCOL_ITER_HASLEN*/;
498 coliter
->origFlags
= othercoliter
->origFlags
;
499 coliter
->coll
= othercoliter
->coll
;
500 this->isDataOwned_
= TRUE
;
508 #endif /* #if !UCONFIG_NO_COLLATION */