2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
13 * Created by: Helena Shih
15 * Modification History:
17 * Date Name Description
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
27 #include "unicode/utypes.h"
29 #if !UCONFIG_NO_COLLATION
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
38 /* Constants --------------------------------------------------------------- */
42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator
)
44 /* CollationElementIterator public constructor/destructor ------------------ */
46 CollationElementIterator::CollationElementIterator(
47 const CollationElementIterator
& other
)
48 : UObject(other
), isDataOwned_(TRUE
)
50 UErrorCode status
= U_ZERO_ERROR
;
51 m_data_
= ucol_openElements(other
.m_data_
->iteratordata_
.coll
, NULL
, 0,
57 CollationElementIterator::~CollationElementIterator()
60 ucol_closeElements(m_data_
);
64 /* CollationElementIterator public methods --------------------------------- */
66 int32_t CollationElementIterator::getOffset() const
68 return ucol_getOffset(m_data_
);
72 * Get the ordering priority of the next character in the string.
73 * @return the next character's ordering. Returns NULLORDER if an error has
74 * occured or if the end of string has been reached
76 int32_t CollationElementIterator::next(UErrorCode
& status
)
78 return ucol_next(m_data_
, &status
);
81 UBool
CollationElementIterator::operator!=(
82 const CollationElementIterator
& other
) const
84 return !(*this == other
);
87 UBool
CollationElementIterator::operator==(
88 const CollationElementIterator
& that
) const
90 if (this == &that
|| m_data_
== that
.m_data_
) {
95 if (m_data_
->iteratordata_
.coll
!= that
.m_data_
->iteratordata_
.coll
)
100 // the constructor and setText always sets a length
101 // and we only compare the string not the contents of the normalization
103 int thislength
= (int)(m_data_
->iteratordata_
.endp
- m_data_
->iteratordata_
.string
);
104 int thatlength
= (int)(that
.m_data_
->iteratordata_
.endp
- that
.m_data_
->iteratordata_
.string
);
106 if (thislength
!= thatlength
) {
110 if (uprv_memcmp(m_data_
->iteratordata_
.string
,
111 that
.m_data_
->iteratordata_
.string
,
112 thislength
* U_SIZEOF_UCHAR
) != 0) {
115 if (getOffset() != that
.getOffset()) {
119 // checking normalization buffer
120 if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
121 if ((that
.m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) != 0) {
124 // both are in the normalization buffer
125 if (m_data_
->iteratordata_
.pos
126 - m_data_
->iteratordata_
.writableBuffer
.getBuffer()
127 != that
.m_data_
->iteratordata_
.pos
128 - that
.m_data_
->iteratordata_
.writableBuffer
.getBuffer()) {
129 // not in the same position in the normalization buffer
133 else if ((that
.m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
136 // checking ce position
137 return (m_data_
->iteratordata_
.CEpos
- m_data_
->iteratordata_
.CEs
)
138 == (that
.m_data_
->iteratordata_
.CEpos
139 - that
.m_data_
->iteratordata_
.CEs
);
143 * Get the ordering priority of the previous collation element in the string.
144 * @param status the error code status.
145 * @return the previous element's ordering. Returns NULLORDER if an error has
146 * occured or if the start of string has been reached.
148 int32_t CollationElementIterator::previous(UErrorCode
& status
)
150 return ucol_previous(m_data_
, &status
);
154 * Resets the cursor to the beginning of the string.
156 void CollationElementIterator::reset()
161 void CollationElementIterator::setOffset(int32_t newOffset
,
164 ucol_setOffset(m_data_
, newOffset
, &status
);
168 * Sets the source to the new source string.
170 void CollationElementIterator::setText(const UnicodeString
& source
,
173 if (U_FAILURE(status
)) {
177 int32_t length
= source
.length();
178 UChar
*string
= NULL
;
179 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
180 uprv_free((UChar
*)m_data_
->iteratordata_
.string
);
182 m_data_
->isWritable
= TRUE
;
184 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
186 if (string
== NULL
) {
187 status
= U_MEMORY_ALLOCATION_ERROR
;
190 u_memcpy(string
, source
.getBuffer(), length
);
193 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
195 if (string
== NULL
) {
196 status
= U_MEMORY_ALLOCATION_ERROR
;
201 /* Free offsetBuffer before initializing it. */
202 ucol_freeOffsetBuffer(&(m_data_
->iteratordata_
));
203 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, string
, length
,
204 &m_data_
->iteratordata_
, &status
);
206 m_data_
->reset_
= TRUE
;
209 // Sets the source to the new character iterator.
210 void CollationElementIterator::setText(CharacterIterator
& source
,
213 if (U_FAILURE(status
))
216 int32_t length
= source
.getLength();
217 UChar
*buffer
= NULL
;
220 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
222 if (buffer
== NULL
) {
223 status
= U_MEMORY_ALLOCATION_ERROR
;
229 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
231 if (buffer
== NULL
) {
232 status
= U_MEMORY_ALLOCATION_ERROR
;
236 Using this constructor will prevent buffer from being removed when
239 UnicodeString string
;
240 source
.getText(string
);
241 u_memcpy(buffer
, string
.getBuffer(), length
);
244 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
245 uprv_free((UChar
*)m_data_
->iteratordata_
.string
);
247 m_data_
->isWritable
= TRUE
;
248 /* Free offsetBuffer before initializing it. */
249 ucol_freeOffsetBuffer(&(m_data_
->iteratordata_
));
250 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, buffer
, length
,
251 &m_data_
->iteratordata_
, &status
);
252 m_data_
->reset_
= TRUE
;
255 int32_t CollationElementIterator::strengthOrder(int32_t order
) const
257 UCollationStrength s
= ucol_getStrength(m_data_
->iteratordata_
.coll
);
258 // Mask off the unwanted differences.
259 if (s
== UCOL_PRIMARY
) {
260 order
&= RuleBasedCollator::PRIMARYDIFFERENCEONLY
;
262 else if (s
== UCOL_SECONDARY
) {
263 order
&= RuleBasedCollator::SECONDARYDIFFERENCEONLY
;
269 /* CollationElementIterator private constructors/destructors --------------- */
272 * This is the "real" constructor for this class; it constructs an iterator
273 * over the source text using the specified collator
275 CollationElementIterator::CollationElementIterator(
276 const UnicodeString
& sourceText
,
277 const RuleBasedCollator
* order
,
281 if (U_FAILURE(status
)) {
285 int32_t length
= sourceText
.length();
286 UChar
*string
= NULL
;
289 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
291 if (string
== NULL
) {
292 status
= U_MEMORY_ALLOCATION_ERROR
;
296 Using this constructor will prevent buffer from being removed when
299 u_memcpy(string
, sourceText
.getBuffer(), length
);
302 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
304 if (string
== NULL
) {
305 status
= U_MEMORY_ALLOCATION_ERROR
;
310 m_data_
= ucol_openElements(order
->ucollator
, string
, length
, &status
);
312 /* Test for buffer overflows */
313 if (U_FAILURE(status
)) {
316 m_data_
->isWritable
= TRUE
;
320 * This is the "real" constructor for this class; it constructs an iterator over
321 * the source text using the specified collator
323 CollationElementIterator::CollationElementIterator(
324 const CharacterIterator
& sourceText
,
325 const RuleBasedCollator
* order
,
329 if (U_FAILURE(status
))
332 // **** should I just drop this test? ****
334 if ( sourceText.endIndex() != 0 )
336 // A CollationElementIterator is really a two-layered beast.
337 // Internally it uses a Normalizer to munge the source text into a form
338 // where all "composed" Unicode characters (such as \u00FC) are split into a
339 // normal character and a combining accent character.
340 // Afterward, CollationElementIterator does its own processing to handle
341 // expanding and contracting collation sequences, ignorables, and so on.
343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
344 ? Normalizer::NO_OP : order->getDecomposition();
346 text = new Normalizer(sourceText, decomp);
348 status = U_MEMORY_ALLOCATION_ERROR;
351 int32_t length
= sourceText
.getLength();
354 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
356 if (buffer
== NULL
) {
357 status
= U_MEMORY_ALLOCATION_ERROR
;
361 Using this constructor will prevent buffer from being removed when
364 UnicodeString
string(buffer
, length
, length
);
365 ((CharacterIterator
&)sourceText
).getText(string
);
366 const UChar
*temp
= string
.getBuffer();
367 u_memcpy(buffer
, temp
, length
);
370 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
372 if (buffer
== NULL
) {
373 status
= U_MEMORY_ALLOCATION_ERROR
;
378 m_data_
= ucol_openElements(order
->ucollator
, buffer
, length
, &status
);
380 /* Test for buffer overflows */
381 if (U_FAILURE(status
)) {
384 m_data_
->isWritable
= TRUE
;
387 /* CollationElementIterator protected methods ----------------------------- */
389 const CollationElementIterator
& CollationElementIterator::operator=(
390 const CollationElementIterator
& other
)
394 UCollationElements
*ucolelem
= this->m_data_
;
395 UCollationElements
*otherucolelem
= other
.m_data_
;
396 collIterate
*coliter
= &(ucolelem
->iteratordata_
);
397 collIterate
*othercoliter
= &(otherucolelem
->iteratordata_
);
400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
401 // the normalization buffer
402 length
= (int)(othercoliter
->endp
- othercoliter
->string
);
404 ucolelem
->reset_
= otherucolelem
->reset_
;
405 ucolelem
->isWritable
= TRUE
;
407 /* create a duplicate of string */
409 coliter
->string
= (UChar
*)uprv_malloc(length
* U_SIZEOF_UCHAR
);
410 if(coliter
->string
!= NULL
) {
411 uprv_memcpy((UChar
*)coliter
->string
, othercoliter
->string
,
412 length
* U_SIZEOF_UCHAR
);
413 } else { // Error: couldn't allocate memory. No copying should be done
418 coliter
->string
= NULL
;
421 /* start and end of string */
422 coliter
->endp
= coliter
->string
== NULL
? NULL
: coliter
->string
+ length
;
424 /* handle writable buffer here */
426 if (othercoliter
->flags
& UCOL_ITER_INNORMBUF
) {
427 coliter
->writableBuffer
= othercoliter
->writableBuffer
;
428 coliter
->writableBuffer
.getTerminatedBuffer();
431 /* current position */
432 if (othercoliter
->pos
>= othercoliter
->string
&&
433 othercoliter
->pos
<= othercoliter
->endp
)
435 U_ASSERT(coliter
->string
!= NULL
);
436 coliter
->pos
= coliter
->string
+
437 (othercoliter
->pos
- othercoliter
->string
);
440 coliter
->pos
= coliter
->writableBuffer
.getTerminatedBuffer() +
441 (othercoliter
->pos
- othercoliter
->writableBuffer
.getBuffer());
446 if (coliter
->extendCEs
) {
447 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE
);
448 CEsize
= sizeof(othercoliter
->extendCEs
);
450 othercoliter
->extendCEs
= (uint32_t *)uprv_malloc(CEsize
);
451 uprv_memcpy(coliter
->extendCEs
, othercoliter
->extendCEs
, CEsize
);
453 coliter
->toReturn
= coliter
->extendCEs
+
454 (othercoliter
->toReturn
- othercoliter
->extendCEs
);
455 coliter
->CEpos
= coliter
->extendCEs
+ CEsize
;
457 CEsize
= (int32_t)(othercoliter
->CEpos
- othercoliter
->CEs
);
459 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
, CEsize
);
461 coliter
->toReturn
= coliter
->CEs
+
462 (othercoliter
->toReturn
- othercoliter
->CEs
);
463 coliter
->CEpos
= coliter
->CEs
+ CEsize
;
466 if (othercoliter
->fcdPosition
!= NULL
) {
467 U_ASSERT(coliter
->string
!= NULL
);
468 coliter
->fcdPosition
= coliter
->string
+
469 (othercoliter
->fcdPosition
470 - othercoliter
->string
);
473 coliter
->fcdPosition
= NULL
;
475 coliter
->flags
= othercoliter
->flags
/*| UCOL_ITER_HASLEN*/;
476 coliter
->origFlags
= othercoliter
->origFlags
;
477 coliter
->coll
= othercoliter
->coll
;
478 this->isDataOwned_
= TRUE
;
486 #endif /* #if !UCONFIG_NO_COLLATION */