]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/coleitr.cpp
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / i18n / coleitr.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2003, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */
7
8 /*
9 * File coleitr.cpp
10 *
11 *
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 * Date Name Description
18 *
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
25 */
26
27 #include "unicode/utypes.h"
28
29 #if !UCONFIG_NO_COLLATION
30
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
33 #include "ucol_imp.h"
34 #include "cmemory.h"
35
36
37 /* Constants --------------------------------------------------------------- */
38
39 U_NAMESPACE_BEGIN
40
41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
42
43 /* synwee : public can't remove */
44 int32_t const CollationElementIterator::NULLORDER = 0xffffffff;
45
46 /* CollationElementIterator public constructor/destructor ------------------ */
47
48 CollationElementIterator::CollationElementIterator(
49 const CollationElementIterator& other)
50 : UObject(other), isDataOwned_(TRUE)
51 {
52 UErrorCode status = U_ZERO_ERROR;
53 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
54 &status);
55
56 *this = other;
57 }
58
59 CollationElementIterator::~CollationElementIterator()
60 {
61 if (isDataOwned_) {
62 ucol_closeElements(m_data_);
63 }
64 }
65
66 /* CollationElementIterator public methods --------------------------------- */
67
68 int32_t CollationElementIterator::getOffset() const
69 {
70 return ucol_getOffset(m_data_);
71 }
72
73 /**
74 * Get the ordering priority of the next character in the string.
75 * @return the next character's ordering. Returns NULLORDER if an error has
76 * occured or if the end of string has been reached
77 */
78 int32_t CollationElementIterator::next(UErrorCode& status)
79 {
80 return ucol_next(m_data_, &status);
81 }
82
83 UBool CollationElementIterator::operator!=(
84 const CollationElementIterator& other) const
85 {
86 return !(*this == other);
87 }
88
89 UBool CollationElementIterator::operator==(
90 const CollationElementIterator& that) const
91 {
92 if (this == &that) {
93 return TRUE;
94 }
95
96 if (m_data_ == that.m_data_) {
97 return TRUE;
98 }
99
100 // option comparison
101 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
102 {
103 return FALSE;
104 }
105
106 // the constructor and setText always sets a length
107 // and we only compare the string not the contents of the normalization
108 // buffer
109 int thislength = m_data_->iteratordata_.endp -
110 m_data_->iteratordata_.string;
111 int thatlength = that.m_data_->iteratordata_.endp -
112 that.m_data_->iteratordata_.string;
113
114 if (thislength != thatlength) {
115 return FALSE;
116 }
117
118 if (uprv_memcmp(m_data_->iteratordata_.string,
119 that.m_data_->iteratordata_.string,
120 thislength * U_SIZEOF_UCHAR) != 0) {
121 return FALSE;
122 }
123 if (getOffset() != that.getOffset()) {
124 return FALSE;
125 }
126
127 // checking normalization buffer
128 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
129 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
130 return FALSE;
131 }
132 // both are in the normalization buffer
133 if (m_data_->iteratordata_.pos
134 - m_data_->iteratordata_.writableBuffer
135 != that.m_data_->iteratordata_.pos
136 - that.m_data_->iteratordata_.writableBuffer) {
137 // not in the same position in the normalization buffer
138 return FALSE;
139 }
140 }
141 else if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
142 return FALSE;
143 }
144 // checking ce position
145 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
146 == (that.m_data_->iteratordata_.CEpos
147 - that.m_data_->iteratordata_.CEs);
148 }
149
150 /**
151 * Get the ordering priority of the previous collation element in the string.
152 * @param status the error code status.
153 * @return the previous element's ordering. Returns NULLORDER if an error has
154 * occured or if the start of string has been reached.
155 */
156 int32_t CollationElementIterator::previous(UErrorCode& status)
157 {
158 return ucol_previous(m_data_, &status);
159 }
160
161 /**
162 * Resets the cursor to the beginning of the string.
163 */
164 void CollationElementIterator::reset()
165 {
166 ucol_reset(m_data_);
167 }
168
169 void CollationElementIterator::setOffset(int32_t newOffset,
170 UErrorCode& status)
171 {
172 ucol_setOffset(m_data_, newOffset, &status);
173 }
174
175 /**
176 * Sets the source to the new source string.
177 */
178 void CollationElementIterator::setText(const UnicodeString& source,
179 UErrorCode& status)
180 {
181 if (U_FAILURE(status)) {
182 return;
183 }
184
185 int32_t length = source.length();
186 UChar *string = NULL;
187 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
188 uprv_free(m_data_->iteratordata_.string);
189 }
190 m_data_->isWritable = TRUE;
191 if (length > 0) {
192 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
193 /* test for NULL */
194 if (string == NULL) {
195 status = U_MEMORY_ALLOCATION_ERROR;
196 return;
197 }
198 u_memcpy(string, source.getBuffer(), length);
199 }
200 else {
201 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
202 /* test for NULL */
203 if (string == NULL) {
204 status = U_MEMORY_ALLOCATION_ERROR;
205 return;
206 }
207 *string = 0;
208 }
209 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
210 &m_data_->iteratordata_);
211
212 m_data_->reset_ = TRUE;
213 }
214
215 // Sets the source to the new character iterator.
216 void CollationElementIterator::setText(CharacterIterator& source,
217 UErrorCode& status)
218 {
219 if (U_FAILURE(status))
220 return;
221
222 int32_t length = source.getLength();
223 UChar *buffer = NULL;
224
225 if (length == 0) {
226 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
227 /* test for NULL */
228 if (buffer == NULL) {
229 status = U_MEMORY_ALLOCATION_ERROR;
230 return;
231 }
232 *buffer = 0;
233 }
234 else {
235 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
236 /* test for NULL */
237 if (buffer == NULL) {
238 status = U_MEMORY_ALLOCATION_ERROR;
239 return;
240 }
241 /*
242 Using this constructor will prevent buffer from being removed when
243 string gets removed
244 */
245 UnicodeString string;
246 source.getText(string);
247 u_memcpy(buffer, string.getBuffer(), length);
248 }
249
250 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
251 uprv_free(m_data_->iteratordata_.string);
252 }
253 m_data_->isWritable = TRUE;
254 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
255 &m_data_->iteratordata_);
256 m_data_->reset_ = TRUE;
257 }
258
259 int32_t CollationElementIterator::strengthOrder(int32_t order) const
260 {
261 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
262 // Mask off the unwanted differences.
263 if (s == UCOL_PRIMARY) {
264 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
265 }
266 else if (s == UCOL_SECONDARY) {
267 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
268 }
269
270 return order;
271 }
272
273 /* CollationElementIterator private constructors/destructors --------------- */
274
275 /**
276 * This is the "real" constructor for this class; it constructs an iterator
277 * over the source text using the specified collator
278 */
279 CollationElementIterator::CollationElementIterator(
280 const UnicodeString& sourceText,
281 const RuleBasedCollator* order,
282 UErrorCode& status)
283 : isDataOwned_(TRUE)
284 {
285 if (U_FAILURE(status)) {
286 return;
287 }
288
289 int32_t length = sourceText.length();
290 UChar *string = NULL;
291
292 if (length > 0) {
293 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
294 /* test for NULL */
295 if (string == NULL) {
296 status = U_MEMORY_ALLOCATION_ERROR;
297 return;
298 }
299 /*
300 Using this constructor will prevent buffer from being removed when
301 string gets removed
302 */
303 u_memcpy(string, sourceText.getBuffer(), length);
304 }
305 else {
306 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
307 /* test for NULL */
308 if (string == NULL) {
309 status = U_MEMORY_ALLOCATION_ERROR;
310 return;
311 }
312 *string = 0;
313 }
314 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
315
316 /* Test for buffer overflows */
317 if (U_FAILURE(status)) {
318 return;
319 }
320 m_data_->isWritable = TRUE;
321 }
322
323 /**
324 * This is the "real" constructor for this class; it constructs an iterator over
325 * the source text using the specified collator
326 */
327 CollationElementIterator::CollationElementIterator(
328 const CharacterIterator& sourceText,
329 const RuleBasedCollator* order,
330 UErrorCode& status)
331 : isDataOwned_(TRUE)
332 {
333 if (U_FAILURE(status))
334 return;
335
336 // **** should I just drop this test? ****
337 /*
338 if ( sourceText.endIndex() != 0 )
339 {
340 // A CollationElementIterator is really a two-layered beast.
341 // Internally it uses a Normalizer to munge the source text into a form
342 // where all "composed" Unicode characters (such as \u00FC) are split into a
343 // normal character and a combining accent character.
344 // Afterward, CollationElementIterator does its own processing to handle
345 // expanding and contracting collation sequences, ignorables, and so on.
346
347 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
348 ? Normalizer::NO_OP : order->getDecomposition();
349
350 text = new Normalizer(sourceText, decomp);
351 if (text == NULL)
352 status = U_MEMORY_ALLOCATION_ERROR;
353 }
354 */
355 int32_t length = sourceText.getLength();
356 UChar *buffer;
357 if (length > 0) {
358 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
359 /* test for NULL */
360 if (buffer == NULL) {
361 status = U_MEMORY_ALLOCATION_ERROR;
362 return;
363 }
364 /*
365 Using this constructor will prevent buffer from being removed when
366 string gets removed
367 */
368 UnicodeString string(buffer, length, length);
369 ((CharacterIterator &)sourceText).getText(string);
370 const UChar *temp = string.getBuffer();
371 u_memcpy(buffer, temp, length);
372 }
373 else {
374 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
375 /* test for NULL */
376 if (buffer == NULL) {
377 status = U_MEMORY_ALLOCATION_ERROR;
378 return;
379 }
380 *buffer = 0;
381 }
382 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
383
384 /* Test for buffer overflows */
385 if (U_FAILURE(status)) {
386 return;
387 }
388 m_data_->isWritable = TRUE;
389 }
390
391 /* CollationElementIterator protected methods ----------------------------- */
392
393 const CollationElementIterator& CollationElementIterator::operator=(
394 const CollationElementIterator& other)
395 {
396 if (this != &other)
397 {
398 UCollationElements *ucolelem = this->m_data_;
399 UCollationElements *otherucolelem = other.m_data_;
400 collIterate *coliter = &(ucolelem->iteratordata_);
401 collIterate *othercoliter = &(otherucolelem->iteratordata_);
402 int length = 0;
403
404 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
405 // the normalization buffer
406 length = othercoliter->endp - othercoliter->string;
407
408 ucolelem->reset_ = otherucolelem->reset_;
409 ucolelem->isWritable = TRUE;
410
411 /* create a duplicate of string */
412 if (length > 0) {
413 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
414 if(coliter->string != NULL) {
415 uprv_memcpy(coliter->string, othercoliter->string,
416 length * U_SIZEOF_UCHAR);
417 } else { // Error: couldn't allocate memory. No copying should be done
418 length = 0;
419 }
420 }
421 else {
422 coliter->string = NULL;
423 }
424
425 /* start and end of string */
426 coliter->endp = coliter->string + length;
427
428 /* handle writable buffer here */
429
430 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
431 uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
432 if (wlength < coliter->writableBufSize) {
433 uprv_memcpy(coliter->stackWritableBuffer,
434 othercoliter->stackWritableBuffer,
435 othercoliter->writableBufSize * U_SIZEOF_UCHAR);
436 }
437 else {
438 if (coliter->writableBuffer != coliter->stackWritableBuffer) {
439 uprv_free(coliter->writableBuffer);
440 }
441 coliter->writableBuffer = (UChar *)uprv_malloc(
442 wlength * U_SIZEOF_UCHAR);
443 if(coliter->writableBuffer != NULL) {
444 uprv_memcpy(coliter->writableBuffer,
445 othercoliter->writableBuffer,
446 wlength * U_SIZEOF_UCHAR);
447 coliter->writableBufSize = wlength;
448 } else { // Error: couldn't allocate memory for writableBuffer
449 coliter->writableBufSize = 0;
450 }
451 }
452 }
453
454 /* current position */
455 if (othercoliter->pos >= othercoliter->string &&
456 othercoliter->pos <= othercoliter->endp) {
457 coliter->pos = coliter->string +
458 (othercoliter->pos - othercoliter->string);
459 }
460 else {
461 coliter->pos = coliter->writableBuffer +
462 (othercoliter->pos - othercoliter->writableBuffer);
463 }
464
465 /* CE buffer */
466 uprv_memcpy(coliter->CEs, othercoliter->CEs,
467 UCOL_EXPAND_CE_BUFFER_SIZE * sizeof(uint32_t));
468 coliter->toReturn = coliter->CEs +
469 (othercoliter->toReturn - othercoliter->CEs);
470 coliter->CEpos = coliter->CEs +
471 (othercoliter->CEpos - othercoliter->CEs);
472
473 if (othercoliter->fcdPosition != NULL) {
474 coliter->fcdPosition = coliter->string +
475 (othercoliter->fcdPosition
476 - othercoliter->string);
477 }
478 else {
479 coliter->fcdPosition = NULL;
480 }
481 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
482 coliter->origFlags = othercoliter->origFlags;
483 coliter->coll = othercoliter->coll;
484 this->isDataOwned_ = TRUE;
485 }
486
487 return *this;
488 }
489
490 U_NAMESPACE_END
491
492 #endif /* #if !UCONFIG_NO_COLLATION */
493
494 /* eof */