2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
13 * Created by: Helena Shih
15 * Modification History:
17 * Date Name Description
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
27 #include "unicode/utypes.h"
29 #if !UCONFIG_NO_COLLATION
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
37 /* Constants --------------------------------------------------------------- */
41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator
)
43 /* CollationElementIterator public constructor/destructor ------------------ */
45 CollationElementIterator::CollationElementIterator(
46 const CollationElementIterator
& other
)
47 : UObject(other
), isDataOwned_(TRUE
)
49 UErrorCode status
= U_ZERO_ERROR
;
50 m_data_
= ucol_openElements(other
.m_data_
->iteratordata_
.coll
, NULL
, 0,
56 CollationElementIterator::~CollationElementIterator()
59 ucol_closeElements(m_data_
);
63 /* CollationElementIterator public methods --------------------------------- */
65 int32_t CollationElementIterator::getOffset() const
67 return ucol_getOffset(m_data_
);
71 * Get the ordering priority of the next character in the string.
72 * @return the next character's ordering. Returns NULLORDER if an error has
73 * occured or if the end of string has been reached
75 int32_t CollationElementIterator::next(UErrorCode
& status
)
77 return ucol_next(m_data_
, &status
);
80 UBool
CollationElementIterator::operator!=(
81 const CollationElementIterator
& other
) const
83 return !(*this == other
);
86 UBool
CollationElementIterator::operator==(
87 const CollationElementIterator
& that
) const
93 if (m_data_
== that
.m_data_
) {
98 if (m_data_
->iteratordata_
.coll
!= that
.m_data_
->iteratordata_
.coll
)
103 // the constructor and setText always sets a length
104 // and we only compare the string not the contents of the normalization
106 int thislength
= m_data_
->iteratordata_
.endp
-
107 m_data_
->iteratordata_
.string
;
108 int thatlength
= that
.m_data_
->iteratordata_
.endp
-
109 that
.m_data_
->iteratordata_
.string
;
111 if (thislength
!= thatlength
) {
115 if (uprv_memcmp(m_data_
->iteratordata_
.string
,
116 that
.m_data_
->iteratordata_
.string
,
117 thislength
* U_SIZEOF_UCHAR
) != 0) {
120 if (getOffset() != that
.getOffset()) {
124 // checking normalization buffer
125 if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
126 if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) != 0) {
129 // both are in the normalization buffer
130 if (m_data_
->iteratordata_
.pos
131 - m_data_
->iteratordata_
.writableBuffer
132 != that
.m_data_
->iteratordata_
.pos
133 - that
.m_data_
->iteratordata_
.writableBuffer
) {
134 // not in the same position in the normalization buffer
138 else if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
141 // checking ce position
142 return (m_data_
->iteratordata_
.CEpos
- m_data_
->iteratordata_
.CEs
)
143 == (that
.m_data_
->iteratordata_
.CEpos
144 - that
.m_data_
->iteratordata_
.CEs
);
148 * Get the ordering priority of the previous collation element in the string.
149 * @param status the error code status.
150 * @return the previous element's ordering. Returns NULLORDER if an error has
151 * occured or if the start of string has been reached.
153 int32_t CollationElementIterator::previous(UErrorCode
& status
)
155 return ucol_previous(m_data_
, &status
);
159 * Resets the cursor to the beginning of the string.
161 void CollationElementIterator::reset()
166 void CollationElementIterator::setOffset(int32_t newOffset
,
169 ucol_setOffset(m_data_
, newOffset
, &status
);
173 * Sets the source to the new source string.
175 void CollationElementIterator::setText(const UnicodeString
& source
,
178 if (U_FAILURE(status
)) {
182 int32_t length
= source
.length();
183 UChar
*string
= NULL
;
184 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
185 uprv_free(m_data_
->iteratordata_
.string
);
187 m_data_
->isWritable
= TRUE
;
189 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
191 if (string
== NULL
) {
192 status
= U_MEMORY_ALLOCATION_ERROR
;
195 u_memcpy(string
, source
.getBuffer(), length
);
198 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
200 if (string
== NULL
) {
201 status
= U_MEMORY_ALLOCATION_ERROR
;
206 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, string
, length
,
207 &m_data_
->iteratordata_
);
209 m_data_
->reset_
= TRUE
;
212 // Sets the source to the new character iterator.
213 void CollationElementIterator::setText(CharacterIterator
& source
,
216 if (U_FAILURE(status
))
219 int32_t length
= source
.getLength();
220 UChar
*buffer
= NULL
;
223 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
225 if (buffer
== NULL
) {
226 status
= U_MEMORY_ALLOCATION_ERROR
;
232 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
234 if (buffer
== NULL
) {
235 status
= U_MEMORY_ALLOCATION_ERROR
;
239 Using this constructor will prevent buffer from being removed when
242 UnicodeString string
;
243 source
.getText(string
);
244 u_memcpy(buffer
, string
.getBuffer(), length
);
247 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
248 uprv_free(m_data_
->iteratordata_
.string
);
250 m_data_
->isWritable
= TRUE
;
251 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, buffer
, length
,
252 &m_data_
->iteratordata_
);
253 m_data_
->reset_
= TRUE
;
256 int32_t CollationElementIterator::strengthOrder(int32_t order
) const
258 UCollationStrength s
= ucol_getStrength(m_data_
->iteratordata_
.coll
);
259 // Mask off the unwanted differences.
260 if (s
== UCOL_PRIMARY
) {
261 order
&= RuleBasedCollator::PRIMARYDIFFERENCEONLY
;
263 else if (s
== UCOL_SECONDARY
) {
264 order
&= RuleBasedCollator::SECONDARYDIFFERENCEONLY
;
270 /* CollationElementIterator private constructors/destructors --------------- */
273 * This is the "real" constructor for this class; it constructs an iterator
274 * over the source text using the specified collator
276 CollationElementIterator::CollationElementIterator(
277 const UnicodeString
& sourceText
,
278 const RuleBasedCollator
* order
,
282 if (U_FAILURE(status
)) {
286 int32_t length
= sourceText
.length();
287 UChar
*string
= NULL
;
290 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
292 if (string
== NULL
) {
293 status
= U_MEMORY_ALLOCATION_ERROR
;
297 Using this constructor will prevent buffer from being removed when
300 u_memcpy(string
, sourceText
.getBuffer(), length
);
303 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
305 if (string
== NULL
) {
306 status
= U_MEMORY_ALLOCATION_ERROR
;
311 m_data_
= ucol_openElements(order
->ucollator
, string
, length
, &status
);
313 /* Test for buffer overflows */
314 if (U_FAILURE(status
)) {
317 m_data_
->isWritable
= TRUE
;
321 * This is the "real" constructor for this class; it constructs an iterator over
322 * the source text using the specified collator
324 CollationElementIterator::CollationElementIterator(
325 const CharacterIterator
& sourceText
,
326 const RuleBasedCollator
* order
,
330 if (U_FAILURE(status
))
333 // **** should I just drop this test? ****
335 if ( sourceText.endIndex() != 0 )
337 // A CollationElementIterator is really a two-layered beast.
338 // Internally it uses a Normalizer to munge the source text into a form
339 // where all "composed" Unicode characters (such as \u00FC) are split into a
340 // normal character and a combining accent character.
341 // Afterward, CollationElementIterator does its own processing to handle
342 // expanding and contracting collation sequences, ignorables, and so on.
344 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
345 ? Normalizer::NO_OP : order->getDecomposition();
347 text = new Normalizer(sourceText, decomp);
349 status = U_MEMORY_ALLOCATION_ERROR;
352 int32_t length
= sourceText
.getLength();
355 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
357 if (buffer
== NULL
) {
358 status
= U_MEMORY_ALLOCATION_ERROR
;
362 Using this constructor will prevent buffer from being removed when
365 UnicodeString
string(buffer
, length
, length
);
366 ((CharacterIterator
&)sourceText
).getText(string
);
367 const UChar
*temp
= string
.getBuffer();
368 u_memcpy(buffer
, temp
, length
);
371 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
373 if (buffer
== NULL
) {
374 status
= U_MEMORY_ALLOCATION_ERROR
;
379 m_data_
= ucol_openElements(order
->ucollator
, buffer
, length
, &status
);
381 /* Test for buffer overflows */
382 if (U_FAILURE(status
)) {
385 m_data_
->isWritable
= TRUE
;
388 /* CollationElementIterator protected methods ----------------------------- */
390 const CollationElementIterator
& CollationElementIterator::operator=(
391 const CollationElementIterator
& other
)
395 UCollationElements
*ucolelem
= this->m_data_
;
396 UCollationElements
*otherucolelem
= other
.m_data_
;
397 collIterate
*coliter
= &(ucolelem
->iteratordata_
);
398 collIterate
*othercoliter
= &(otherucolelem
->iteratordata_
);
401 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
402 // the normalization buffer
403 length
= othercoliter
->endp
- othercoliter
->string
;
405 ucolelem
->reset_
= otherucolelem
->reset_
;
406 ucolelem
->isWritable
= TRUE
;
408 /* create a duplicate of string */
410 coliter
->string
= (UChar
*)uprv_malloc(length
* U_SIZEOF_UCHAR
);
411 if(coliter
->string
!= NULL
) {
412 uprv_memcpy(coliter
->string
, othercoliter
->string
,
413 length
* U_SIZEOF_UCHAR
);
414 } else { // Error: couldn't allocate memory. No copying should be done
419 coliter
->string
= NULL
;
422 /* start and end of string */
423 coliter
->endp
= coliter
->string
+ length
;
425 /* handle writable buffer here */
427 if (othercoliter
->flags
& UCOL_ITER_INNORMBUF
) {
428 uint32_t wlength
= u_strlen(othercoliter
->writableBuffer
) + 1;
429 if (wlength
< coliter
->writableBufSize
) {
430 uprv_memcpy(coliter
->stackWritableBuffer
,
431 othercoliter
->stackWritableBuffer
,
432 wlength
* U_SIZEOF_UCHAR
);
435 if (coliter
->writableBuffer
!= coliter
->stackWritableBuffer
) {
436 uprv_free(coliter
->writableBuffer
);
438 coliter
->writableBuffer
= (UChar
*)uprv_malloc(
439 wlength
* U_SIZEOF_UCHAR
);
440 if(coliter
->writableBuffer
!= NULL
) {
441 uprv_memcpy(coliter
->writableBuffer
,
442 othercoliter
->writableBuffer
,
443 wlength
* U_SIZEOF_UCHAR
);
444 coliter
->writableBufSize
= wlength
;
445 } else { // Error: couldn't allocate memory for writableBuffer
446 coliter
->writableBufSize
= 0;
451 /* current position */
452 if (othercoliter
->pos
>= othercoliter
->string
&&
453 othercoliter
->pos
<= othercoliter
->endp
)
455 coliter
->pos
= coliter
->string
+
456 (othercoliter
->pos
- othercoliter
->string
);
459 coliter
->pos
= coliter
->writableBuffer
+
460 (othercoliter
->pos
- othercoliter
->writableBuffer
);
464 int32_t CEsize
= (int32_t)(othercoliter
->CEpos
- othercoliter
->CEs
);
466 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
, CEsize
);
468 coliter
->toReturn
= coliter
->CEs
+
469 (othercoliter
->toReturn
- othercoliter
->CEs
);
470 coliter
->CEpos
= coliter
->CEs
+ CEsize
;
472 if (othercoliter
->fcdPosition
!= NULL
) {
473 coliter
->fcdPosition
= coliter
->string
+
474 (othercoliter
->fcdPosition
475 - othercoliter
->string
);
478 coliter
->fcdPosition
= NULL
;
480 coliter
->flags
= othercoliter
->flags
/*| UCOL_ITER_HASLEN*/;
481 coliter
->origFlags
= othercoliter
->origFlags
;
482 coliter
->coll
= othercoliter
->coll
;
483 this->isDataOwned_
= TRUE
;
491 #endif /* #if !UCONFIG_NO_COLLATION */