2 *******************************************************************************
3 * Copyright (C) 1996-2003, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
13 * Created by: Helena Shih
15 * Modification History:
17 * Date Name Description
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
27 #include "unicode/utypes.h"
29 #if !UCONFIG_NO_COLLATION
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
37 /* Constants --------------------------------------------------------------- */
41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator
)
43 /* synwee : public can't remove */
44 int32_t const CollationElementIterator::NULLORDER
= 0xffffffff;
46 /* CollationElementIterator public constructor/destructor ------------------ */
48 CollationElementIterator::CollationElementIterator(
49 const CollationElementIterator
& other
)
50 : UObject(other
), isDataOwned_(TRUE
)
52 UErrorCode status
= U_ZERO_ERROR
;
53 m_data_
= ucol_openElements(other
.m_data_
->iteratordata_
.coll
, NULL
, 0,
59 CollationElementIterator::~CollationElementIterator()
62 ucol_closeElements(m_data_
);
66 /* CollationElementIterator public methods --------------------------------- */
68 int32_t CollationElementIterator::getOffset() const
70 return ucol_getOffset(m_data_
);
74 * Get the ordering priority of the next character in the string.
75 * @return the next character's ordering. Returns NULLORDER if an error has
76 * occured or if the end of string has been reached
78 int32_t CollationElementIterator::next(UErrorCode
& status
)
80 return ucol_next(m_data_
, &status
);
83 UBool
CollationElementIterator::operator!=(
84 const CollationElementIterator
& other
) const
86 return !(*this == other
);
89 UBool
CollationElementIterator::operator==(
90 const CollationElementIterator
& that
) const
96 if (m_data_
== that
.m_data_
) {
101 if (m_data_
->iteratordata_
.coll
!= that
.m_data_
->iteratordata_
.coll
)
106 // the constructor and setText always sets a length
107 // and we only compare the string not the contents of the normalization
109 int thislength
= m_data_
->iteratordata_
.endp
-
110 m_data_
->iteratordata_
.string
;
111 int thatlength
= that
.m_data_
->iteratordata_
.endp
-
112 that
.m_data_
->iteratordata_
.string
;
114 if (thislength
!= thatlength
) {
118 if (uprv_memcmp(m_data_
->iteratordata_
.string
,
119 that
.m_data_
->iteratordata_
.string
,
120 thislength
* U_SIZEOF_UCHAR
) != 0) {
123 if (getOffset() != that
.getOffset()) {
127 // checking normalization buffer
128 if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
129 if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) != 0) {
132 // both are in the normalization buffer
133 if (m_data_
->iteratordata_
.pos
134 - m_data_
->iteratordata_
.writableBuffer
135 != that
.m_data_
->iteratordata_
.pos
136 - that
.m_data_
->iteratordata_
.writableBuffer
) {
137 // not in the same position in the normalization buffer
141 else if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
144 // checking ce position
145 return (m_data_
->iteratordata_
.CEpos
- m_data_
->iteratordata_
.CEs
)
146 == (that
.m_data_
->iteratordata_
.CEpos
147 - that
.m_data_
->iteratordata_
.CEs
);
151 * Get the ordering priority of the previous collation element in the string.
152 * @param status the error code status.
153 * @return the previous element's ordering. Returns NULLORDER if an error has
154 * occured or if the start of string has been reached.
156 int32_t CollationElementIterator::previous(UErrorCode
& status
)
158 return ucol_previous(m_data_
, &status
);
162 * Resets the cursor to the beginning of the string.
164 void CollationElementIterator::reset()
169 void CollationElementIterator::setOffset(int32_t newOffset
,
172 ucol_setOffset(m_data_
, newOffset
, &status
);
176 * Sets the source to the new source string.
178 void CollationElementIterator::setText(const UnicodeString
& source
,
181 if (U_FAILURE(status
)) {
185 int32_t length
= source
.length();
186 UChar
*string
= NULL
;
187 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
188 uprv_free(m_data_
->iteratordata_
.string
);
190 m_data_
->isWritable
= TRUE
;
192 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
194 if (string
== NULL
) {
195 status
= U_MEMORY_ALLOCATION_ERROR
;
198 u_memcpy(string
, source
.getBuffer(), length
);
201 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
203 if (string
== NULL
) {
204 status
= U_MEMORY_ALLOCATION_ERROR
;
209 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, string
, length
,
210 &m_data_
->iteratordata_
);
212 m_data_
->reset_
= TRUE
;
215 // Sets the source to the new character iterator.
216 void CollationElementIterator::setText(CharacterIterator
& source
,
219 if (U_FAILURE(status
))
222 int32_t length
= source
.getLength();
223 UChar
*buffer
= NULL
;
226 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
228 if (buffer
== NULL
) {
229 status
= U_MEMORY_ALLOCATION_ERROR
;
235 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
237 if (buffer
== NULL
) {
238 status
= U_MEMORY_ALLOCATION_ERROR
;
242 Using this constructor will prevent buffer from being removed when
245 UnicodeString string
;
246 source
.getText(string
);
247 u_memcpy(buffer
, string
.getBuffer(), length
);
250 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
251 uprv_free(m_data_
->iteratordata_
.string
);
253 m_data_
->isWritable
= TRUE
;
254 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, buffer
, length
,
255 &m_data_
->iteratordata_
);
256 m_data_
->reset_
= TRUE
;
259 int32_t CollationElementIterator::strengthOrder(int32_t order
) const
261 UCollationStrength s
= ucol_getStrength(m_data_
->iteratordata_
.coll
);
262 // Mask off the unwanted differences.
263 if (s
== UCOL_PRIMARY
) {
264 order
&= RuleBasedCollator::PRIMARYDIFFERENCEONLY
;
266 else if (s
== UCOL_SECONDARY
) {
267 order
&= RuleBasedCollator::SECONDARYDIFFERENCEONLY
;
273 /* CollationElementIterator private constructors/destructors --------------- */
276 * This is the "real" constructor for this class; it constructs an iterator
277 * over the source text using the specified collator
279 CollationElementIterator::CollationElementIterator(
280 const UnicodeString
& sourceText
,
281 const RuleBasedCollator
* order
,
285 if (U_FAILURE(status
)) {
289 int32_t length
= sourceText
.length();
290 UChar
*string
= NULL
;
293 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
295 if (string
== NULL
) {
296 status
= U_MEMORY_ALLOCATION_ERROR
;
300 Using this constructor will prevent buffer from being removed when
303 u_memcpy(string
, sourceText
.getBuffer(), length
);
306 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
308 if (string
== NULL
) {
309 status
= U_MEMORY_ALLOCATION_ERROR
;
314 m_data_
= ucol_openElements(order
->ucollator
, string
, length
, &status
);
316 /* Test for buffer overflows */
317 if (U_FAILURE(status
)) {
320 m_data_
->isWritable
= TRUE
;
324 * This is the "real" constructor for this class; it constructs an iterator over
325 * the source text using the specified collator
327 CollationElementIterator::CollationElementIterator(
328 const CharacterIterator
& sourceText
,
329 const RuleBasedCollator
* order
,
333 if (U_FAILURE(status
))
336 // **** should I just drop this test? ****
338 if ( sourceText.endIndex() != 0 )
340 // A CollationElementIterator is really a two-layered beast.
341 // Internally it uses a Normalizer to munge the source text into a form
342 // where all "composed" Unicode characters (such as \u00FC) are split into a
343 // normal character and a combining accent character.
344 // Afterward, CollationElementIterator does its own processing to handle
345 // expanding and contracting collation sequences, ignorables, and so on.
347 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
348 ? Normalizer::NO_OP : order->getDecomposition();
350 text = new Normalizer(sourceText, decomp);
352 status = U_MEMORY_ALLOCATION_ERROR;
355 int32_t length
= sourceText
.getLength();
358 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
360 if (buffer
== NULL
) {
361 status
= U_MEMORY_ALLOCATION_ERROR
;
365 Using this constructor will prevent buffer from being removed when
368 UnicodeString
string(buffer
, length
, length
);
369 ((CharacterIterator
&)sourceText
).getText(string
);
370 const UChar
*temp
= string
.getBuffer();
371 u_memcpy(buffer
, temp
, length
);
374 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
376 if (buffer
== NULL
) {
377 status
= U_MEMORY_ALLOCATION_ERROR
;
382 m_data_
= ucol_openElements(order
->ucollator
, buffer
, length
, &status
);
384 /* Test for buffer overflows */
385 if (U_FAILURE(status
)) {
388 m_data_
->isWritable
= TRUE
;
391 /* CollationElementIterator protected methods ----------------------------- */
393 const CollationElementIterator
& CollationElementIterator::operator=(
394 const CollationElementIterator
& other
)
398 UCollationElements
*ucolelem
= this->m_data_
;
399 UCollationElements
*otherucolelem
= other
.m_data_
;
400 collIterate
*coliter
= &(ucolelem
->iteratordata_
);
401 collIterate
*othercoliter
= &(otherucolelem
->iteratordata_
);
404 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
405 // the normalization buffer
406 length
= othercoliter
->endp
- othercoliter
->string
;
408 ucolelem
->reset_
= otherucolelem
->reset_
;
409 ucolelem
->isWritable
= TRUE
;
411 /* create a duplicate of string */
413 coliter
->string
= (UChar
*)uprv_malloc(length
* U_SIZEOF_UCHAR
);
414 if(coliter
->string
!= NULL
) {
415 uprv_memcpy(coliter
->string
, othercoliter
->string
,
416 length
* U_SIZEOF_UCHAR
);
417 } else { // Error: couldn't allocate memory. No copying should be done
422 coliter
->string
= NULL
;
425 /* start and end of string */
426 coliter
->endp
= coliter
->string
+ length
;
428 /* handle writable buffer here */
430 if (othercoliter
->flags
& UCOL_ITER_INNORMBUF
) {
431 uint32_t wlength
= u_strlen(othercoliter
->writableBuffer
) + 1;
432 if (wlength
< coliter
->writableBufSize
) {
433 uprv_memcpy(coliter
->stackWritableBuffer
,
434 othercoliter
->stackWritableBuffer
,
435 othercoliter
->writableBufSize
* U_SIZEOF_UCHAR
);
438 if (coliter
->writableBuffer
!= coliter
->stackWritableBuffer
) {
439 uprv_free(coliter
->writableBuffer
);
441 coliter
->writableBuffer
= (UChar
*)uprv_malloc(
442 wlength
* U_SIZEOF_UCHAR
);
443 if(coliter
->writableBuffer
!= NULL
) {
444 uprv_memcpy(coliter
->writableBuffer
,
445 othercoliter
->writableBuffer
,
446 wlength
* U_SIZEOF_UCHAR
);
447 coliter
->writableBufSize
= wlength
;
448 } else { // Error: couldn't allocate memory for writableBuffer
449 coliter
->writableBufSize
= 0;
454 /* current position */
455 if (othercoliter
->pos
>= othercoliter
->string
&&
456 othercoliter
->pos
<= othercoliter
->endp
) {
457 coliter
->pos
= coliter
->string
+
458 (othercoliter
->pos
- othercoliter
->string
);
461 coliter
->pos
= coliter
->writableBuffer
+
462 (othercoliter
->pos
- othercoliter
->writableBuffer
);
466 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
,
467 UCOL_EXPAND_CE_BUFFER_SIZE
* sizeof(uint32_t));
468 coliter
->toReturn
= coliter
->CEs
+
469 (othercoliter
->toReturn
- othercoliter
->CEs
);
470 coliter
->CEpos
= coliter
->CEs
+
471 (othercoliter
->CEpos
- othercoliter
->CEs
);
473 if (othercoliter
->fcdPosition
!= NULL
) {
474 coliter
->fcdPosition
= coliter
->string
+
475 (othercoliter
->fcdPosition
476 - othercoliter
->string
);
479 coliter
->fcdPosition
= NULL
;
481 coliter
->flags
= othercoliter
->flags
/*| UCOL_ITER_HASLEN*/;
482 coliter
->origFlags
= othercoliter
->origFlags
;
483 coliter
->coll
= othercoliter
->coll
;
484 this->isDataOwned_
= TRUE
;
492 #endif /* #if !UCONFIG_NO_COLLATION */