2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
13 * Created by: Helena Shih
15 * Modification History:
17 * Date Name Description
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
27 #include "unicode/utypes.h"
29 #if !UCONFIG_NO_COLLATION
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
37 /* Constants --------------------------------------------------------------- */
41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator
)
43 /* CollationElementIterator public constructor/destructor ------------------ */
45 CollationElementIterator::CollationElementIterator(
46 const CollationElementIterator
& other
)
47 : UObject(other
), isDataOwned_(TRUE
)
49 UErrorCode status
= U_ZERO_ERROR
;
50 m_data_
= ucol_openElements(other
.m_data_
->iteratordata_
.coll
, NULL
, 0,
56 CollationElementIterator::~CollationElementIterator()
59 ucol_closeElements(m_data_
);
63 /* CollationElementIterator public methods --------------------------------- */
65 int32_t CollationElementIterator::getOffset() const
67 return ucol_getOffset(m_data_
);
71 * Get the ordering priority of the next character in the string.
72 * @return the next character's ordering. Returns NULLORDER if an error has
73 * occured or if the end of string has been reached
75 int32_t CollationElementIterator::next(UErrorCode
& status
)
77 return ucol_next(m_data_
, &status
);
80 UBool
CollationElementIterator::operator!=(
81 const CollationElementIterator
& other
) const
83 return !(*this == other
);
86 UBool
CollationElementIterator::operator==(
87 const CollationElementIterator
& that
) const
89 if (this == &that
|| m_data_
== that
.m_data_
) {
94 if (m_data_
->iteratordata_
.coll
!= that
.m_data_
->iteratordata_
.coll
)
99 // the constructor and setText always sets a length
100 // and we only compare the string not the contents of the normalization
102 int thislength
= (int)(m_data_
->iteratordata_
.endp
- m_data_
->iteratordata_
.string
);
103 int thatlength
= (int)(that
.m_data_
->iteratordata_
.endp
- that
.m_data_
->iteratordata_
.string
);
105 if (thislength
!= thatlength
) {
109 if (uprv_memcmp(m_data_
->iteratordata_
.string
,
110 that
.m_data_
->iteratordata_
.string
,
111 thislength
* U_SIZEOF_UCHAR
) != 0) {
114 if (getOffset() != that
.getOffset()) {
118 // checking normalization buffer
119 if ((m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
120 if ((that
.m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) != 0) {
123 // both are in the normalization buffer
124 if (m_data_
->iteratordata_
.pos
125 - m_data_
->iteratordata_
.writableBuffer
.getBuffer()
126 != that
.m_data_
->iteratordata_
.pos
127 - that
.m_data_
->iteratordata_
.writableBuffer
.getBuffer()) {
128 // not in the same position in the normalization buffer
132 else if ((that
.m_data_
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) == 0) {
135 // checking ce position
136 return (m_data_
->iteratordata_
.CEpos
- m_data_
->iteratordata_
.CEs
)
137 == (that
.m_data_
->iteratordata_
.CEpos
138 - that
.m_data_
->iteratordata_
.CEs
);
142 * Get the ordering priority of the previous collation element in the string.
143 * @param status the error code status.
144 * @return the previous element's ordering. Returns NULLORDER if an error has
145 * occured or if the start of string has been reached.
147 int32_t CollationElementIterator::previous(UErrorCode
& status
)
149 return ucol_previous(m_data_
, &status
);
153 * Resets the cursor to the beginning of the string.
155 void CollationElementIterator::reset()
160 void CollationElementIterator::setOffset(int32_t newOffset
,
163 ucol_setOffset(m_data_
, newOffset
, &status
);
167 * Sets the source to the new source string.
169 void CollationElementIterator::setText(const UnicodeString
& source
,
172 if (U_FAILURE(status
)) {
176 int32_t length
= source
.length();
177 UChar
*string
= NULL
;
178 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
179 uprv_free((UChar
*)m_data_
->iteratordata_
.string
);
181 m_data_
->isWritable
= TRUE
;
183 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
185 if (string
== NULL
) {
186 status
= U_MEMORY_ALLOCATION_ERROR
;
189 u_memcpy(string
, source
.getBuffer(), length
);
192 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
194 if (string
== NULL
) {
195 status
= U_MEMORY_ALLOCATION_ERROR
;
200 /* Free offsetBuffer before initializing it. */
201 ucol_freeOffsetBuffer(&(m_data_
->iteratordata_
));
202 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, string
, length
,
203 &m_data_
->iteratordata_
, &status
);
205 m_data_
->reset_
= TRUE
;
208 // Sets the source to the new character iterator.
209 void CollationElementIterator::setText(CharacterIterator
& source
,
212 if (U_FAILURE(status
))
215 int32_t length
= source
.getLength();
216 UChar
*buffer
= NULL
;
219 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
221 if (buffer
== NULL
) {
222 status
= U_MEMORY_ALLOCATION_ERROR
;
228 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
230 if (buffer
== NULL
) {
231 status
= U_MEMORY_ALLOCATION_ERROR
;
235 Using this constructor will prevent buffer from being removed when
238 UnicodeString string
;
239 source
.getText(string
);
240 u_memcpy(buffer
, string
.getBuffer(), length
);
243 if (m_data_
->isWritable
&& m_data_
->iteratordata_
.string
!= NULL
) {
244 uprv_free((UChar
*)m_data_
->iteratordata_
.string
);
246 m_data_
->isWritable
= TRUE
;
247 /* Free offsetBuffer before initializing it. */
248 ucol_freeOffsetBuffer(&(m_data_
->iteratordata_
));
249 uprv_init_collIterate(m_data_
->iteratordata_
.coll
, buffer
, length
,
250 &m_data_
->iteratordata_
, &status
);
251 m_data_
->reset_
= TRUE
;
254 int32_t CollationElementIterator::strengthOrder(int32_t order
) const
256 UCollationStrength s
= ucol_getStrength(m_data_
->iteratordata_
.coll
);
257 // Mask off the unwanted differences.
258 if (s
== UCOL_PRIMARY
) {
259 order
&= RuleBasedCollator::PRIMARYDIFFERENCEONLY
;
261 else if (s
== UCOL_SECONDARY
) {
262 order
&= RuleBasedCollator::SECONDARYDIFFERENCEONLY
;
268 /* CollationElementIterator private constructors/destructors --------------- */
271 * This is the "real" constructor for this class; it constructs an iterator
272 * over the source text using the specified collator
274 CollationElementIterator::CollationElementIterator(
275 const UnicodeString
& sourceText
,
276 const RuleBasedCollator
* order
,
280 if (U_FAILURE(status
)) {
284 int32_t length
= sourceText
.length();
285 UChar
*string
= NULL
;
288 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
290 if (string
== NULL
) {
291 status
= U_MEMORY_ALLOCATION_ERROR
;
295 Using this constructor will prevent buffer from being removed when
298 u_memcpy(string
, sourceText
.getBuffer(), length
);
301 string
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
303 if (string
== NULL
) {
304 status
= U_MEMORY_ALLOCATION_ERROR
;
309 m_data_
= ucol_openElements(order
->ucollator
, string
, length
, &status
);
311 /* Test for buffer overflows */
312 if (U_FAILURE(status
)) {
315 m_data_
->isWritable
= TRUE
;
319 * This is the "real" constructor for this class; it constructs an iterator over
320 * the source text using the specified collator
322 CollationElementIterator::CollationElementIterator(
323 const CharacterIterator
& sourceText
,
324 const RuleBasedCollator
* order
,
328 if (U_FAILURE(status
))
331 // **** should I just drop this test? ****
333 if ( sourceText.endIndex() != 0 )
335 // A CollationElementIterator is really a two-layered beast.
336 // Internally it uses a Normalizer to munge the source text into a form
337 // where all "composed" Unicode characters (such as \u00FC) are split into a
338 // normal character and a combining accent character.
339 // Afterward, CollationElementIterator does its own processing to handle
340 // expanding and contracting collation sequences, ignorables, and so on.
342 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
343 ? Normalizer::NO_OP : order->getDecomposition();
345 text = new Normalizer(sourceText, decomp);
347 status = U_MEMORY_ALLOCATION_ERROR;
350 int32_t length
= sourceText
.getLength();
353 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
* length
);
355 if (buffer
== NULL
) {
356 status
= U_MEMORY_ALLOCATION_ERROR
;
360 Using this constructor will prevent buffer from being removed when
363 UnicodeString
string(buffer
, length
, length
);
364 ((CharacterIterator
&)sourceText
).getText(string
);
365 const UChar
*temp
= string
.getBuffer();
366 u_memcpy(buffer
, temp
, length
);
369 buffer
= (UChar
*)uprv_malloc(U_SIZEOF_UCHAR
);
371 if (buffer
== NULL
) {
372 status
= U_MEMORY_ALLOCATION_ERROR
;
377 m_data_
= ucol_openElements(order
->ucollator
, buffer
, length
, &status
);
379 /* Test for buffer overflows */
380 if (U_FAILURE(status
)) {
383 m_data_
->isWritable
= TRUE
;
386 /* CollationElementIterator protected methods ----------------------------- */
388 const CollationElementIterator
& CollationElementIterator::operator=(
389 const CollationElementIterator
& other
)
393 UCollationElements
*ucolelem
= this->m_data_
;
394 UCollationElements
*otherucolelem
= other
.m_data_
;
395 collIterate
*coliter
= &(ucolelem
->iteratordata_
);
396 collIterate
*othercoliter
= &(otherucolelem
->iteratordata_
);
399 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
400 // the normalization buffer
401 length
= (int)(othercoliter
->endp
- othercoliter
->string
);
403 ucolelem
->reset_
= otherucolelem
->reset_
;
404 ucolelem
->isWritable
= TRUE
;
406 /* create a duplicate of string */
408 coliter
->string
= (UChar
*)uprv_malloc(length
* U_SIZEOF_UCHAR
);
409 if(coliter
->string
!= NULL
) {
410 uprv_memcpy((UChar
*)coliter
->string
, othercoliter
->string
,
411 length
* U_SIZEOF_UCHAR
);
412 } else { // Error: couldn't allocate memory. No copying should be done
417 coliter
->string
= NULL
;
420 /* start and end of string */
421 coliter
->endp
= coliter
->string
+ length
;
423 /* handle writable buffer here */
425 if (othercoliter
->flags
& UCOL_ITER_INNORMBUF
) {
426 coliter
->writableBuffer
= othercoliter
->writableBuffer
;
427 coliter
->writableBuffer
.getTerminatedBuffer();
430 /* current position */
431 if (othercoliter
->pos
>= othercoliter
->string
&&
432 othercoliter
->pos
<= othercoliter
->endp
)
434 coliter
->pos
= coliter
->string
+
435 (othercoliter
->pos
- othercoliter
->string
);
438 coliter
->pos
= coliter
->writableBuffer
.getTerminatedBuffer() +
439 (othercoliter
->pos
- othercoliter
->writableBuffer
.getBuffer());
444 if (coliter
->extendCEs
) {
445 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE
);
446 CEsize
= sizeof(othercoliter
->extendCEs
);
448 othercoliter
->extendCEs
= (uint32_t *)uprv_malloc(CEsize
);
449 uprv_memcpy(coliter
->extendCEs
, othercoliter
->extendCEs
, CEsize
);
451 coliter
->toReturn
= coliter
->extendCEs
+
452 (othercoliter
->toReturn
- othercoliter
->extendCEs
);
453 coliter
->CEpos
= coliter
->extendCEs
+ CEsize
;
455 CEsize
= (int32_t)(othercoliter
->CEpos
- othercoliter
->CEs
);
457 uprv_memcpy(coliter
->CEs
, othercoliter
->CEs
, CEsize
);
459 coliter
->toReturn
= coliter
->CEs
+
460 (othercoliter
->toReturn
- othercoliter
->CEs
);
461 coliter
->CEpos
= coliter
->CEs
+ CEsize
;
464 if (othercoliter
->fcdPosition
!= NULL
) {
465 coliter
->fcdPosition
= coliter
->string
+
466 (othercoliter
->fcdPosition
467 - othercoliter
->string
);
470 coliter
->fcdPosition
= NULL
;
472 coliter
->flags
= othercoliter
->flags
/*| UCOL_ITER_HASLEN*/;
473 coliter
->origFlags
= othercoliter
->origFlags
;
474 coliter
->coll
= othercoliter
->coll
;
475 this->isDataOwned_
= TRUE
;
483 #endif /* #if !UCONFIG_NO_COLLATION */