]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucoleitr.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / ucoleitr.cpp
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
2ca993e8 3* Copyright (C) 2001-2016, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5******************************************************************************
6*
7* File ucoleitr.cpp
8*
9* Modification History:
10*
11* Date Name Description
12* 02/15/2001 synwee Modified all methods to process its own function
13* instead of calling the equivalent c++ api (coleitr.h)
57a6839d 14* 2012-2014 markus Rewritten in C++ again.
b75a7d8f
A
15******************************************************************************/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_COLLATION
20
57a6839d
A
21#include "unicode/coleitr.h"
22#include "unicode/tblcoll.h"
b75a7d8f
A
23#include "unicode/ucoleitr.h"
24#include "unicode/ustring.h"
25#include "unicode/sortkey.h"
46f4442e 26#include "unicode/uobject.h"
b75a7d8f 27#include "cmemory.h"
57a6839d
A
28#include "usrchimp.h"
29
b75a7d8f
A
30U_NAMESPACE_USE
31
32#define BUFFER_LENGTH 100
33
46f4442e
A
34#define DEFAULT_BUFFER_SIZE 16
35#define BUFFER_GROW 8
36
a62d09fc 37#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
46f4442e 38
a62d09fc 39#define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
46f4442e 40
46f4442e
A
41#define DELETE_ARRAY(array) uprv_free((void *) (array))
42
46f4442e
A
43struct RCEI
44{
45 uint32_t ce;
46 int32_t low;
47 int32_t high;
48};
49
50U_NAMESPACE_BEGIN
51
52struct RCEBuffer
53{
54 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
55 RCEI *buffer;
56 int32_t bufferIndex;
57 int32_t bufferSize;
58
59 RCEBuffer();
60 ~RCEBuffer();
61
2ca993e8
A
62 UBool isEmpty() const;
63 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode);
46f4442e
A
64 const RCEI *get();
65};
66
67RCEBuffer::RCEBuffer()
68{
69 buffer = defaultBuffer;
70 bufferIndex = 0;
b331163b 71 bufferSize = UPRV_LENGTHOF(defaultBuffer);
46f4442e
A
72}
73
74RCEBuffer::~RCEBuffer()
75{
76 if (buffer != defaultBuffer) {
77 DELETE_ARRAY(buffer);
78 }
79}
80
2ca993e8 81UBool RCEBuffer::isEmpty() const
46f4442e
A
82{
83 return bufferIndex <= 0;
84}
85
2ca993e8 86void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
46f4442e 87{
2ca993e8
A
88 if (U_FAILURE(errorCode)) {
89 return;
90 }
46f4442e
A
91 if (bufferIndex >= bufferSize) {
92 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
2ca993e8
A
93 if (newBuffer == NULL) {
94 errorCode = U_MEMORY_ALLOCATION_ERROR;
95 return;
96 }
46f4442e
A
97
98 ARRAY_COPY(newBuffer, buffer, bufferSize);
99
100 if (buffer != defaultBuffer) {
101 DELETE_ARRAY(buffer);
102 }
103
104 buffer = newBuffer;
105 bufferSize += BUFFER_GROW;
106 }
107
108 buffer[bufferIndex].ce = ce;
109 buffer[bufferIndex].low = ixLow;
110 buffer[bufferIndex].high = ixHigh;
111
112 bufferIndex += 1;
113}
114
115const RCEI *RCEBuffer::get()
116{
117 if (bufferIndex > 0) {
118 return &buffer[--bufferIndex];
119 }
120
121 return NULL;
122}
123
46f4442e
A
124PCEBuffer::PCEBuffer()
125{
126 buffer = defaultBuffer;
127 bufferIndex = 0;
b331163b 128 bufferSize = UPRV_LENGTHOF(defaultBuffer);
46f4442e
A
129}
130
131PCEBuffer::~PCEBuffer()
132{
133 if (buffer != defaultBuffer) {
134 DELETE_ARRAY(buffer);
135 }
136}
137
138void PCEBuffer::reset()
139{
140 bufferIndex = 0;
141}
142
2ca993e8 143UBool PCEBuffer::isEmpty() const
46f4442e
A
144{
145 return bufferIndex <= 0;
146}
147
2ca993e8 148void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
46f4442e 149{
2ca993e8
A
150 if (U_FAILURE(errorCode)) {
151 return;
152 }
46f4442e
A
153 if (bufferIndex >= bufferSize) {
154 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
2ca993e8
A
155 if (newBuffer == NULL) {
156 errorCode = U_MEMORY_ALLOCATION_ERROR;
157 return;
158 }
46f4442e
A
159
160 ARRAY_COPY(newBuffer, buffer, bufferSize);
161
162 if (buffer != defaultBuffer) {
163 DELETE_ARRAY(buffer);
164 }
165
166 buffer = newBuffer;
167 bufferSize += BUFFER_GROW;
168 }
169
170 buffer[bufferIndex].ce = ce;
171 buffer[bufferIndex].low = ixLow;
172 buffer[bufferIndex].high = ixHigh;
173
174 bufferIndex += 1;
175}
176
177const PCEI *PCEBuffer::get()
178{
179 if (bufferIndex > 0) {
180 return &buffer[--bufferIndex];
181 }
182
183 return NULL;
184}
185
57a6839d 186UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
46f4442e 187
57a6839d 188UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
46f4442e 189
57a6839d
A
190void UCollationPCE::init(UCollationElements *elems) {
191 init(CollationElementIterator::fromUCollationElements(elems));
192}
46f4442e 193
57a6839d 194void UCollationPCE::init(CollationElementIterator *iter)
46f4442e 195{
57a6839d
A
196 cei = iter;
197 init(*iter->rbc_);
46f4442e
A
198}
199
57a6839d 200void UCollationPCE::init(const Collator &coll)
46f4442e
A
201{
202 UErrorCode status = U_ZERO_ERROR;
203
57a6839d
A
204 strength = coll.getAttribute(UCOL_STRENGTH, status);
205 toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
46f4442e 206 isShifted = FALSE;
57a6839d 207 variableTop = coll.getVariableTop(status);
46f4442e
A
208}
209
210UCollationPCE::~UCollationPCE()
211{
212 // nothing to do
213}
214
57a6839d 215uint64_t UCollationPCE::processCE(uint32_t ce)
46f4442e
A
216{
217 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
218
219 // This is clean, but somewhat slow...
220 // We could apply the mask to ce and then
221 // just get all three orders...
57a6839d 222 switch(strength) {
46f4442e
A
223 default:
224 tertiary = ucol_tertiaryOrder(ce);
2ca993e8 225 U_FALLTHROUGH;
46f4442e
A
226
227 case UCOL_SECONDARY:
228 secondary = ucol_secondaryOrder(ce);
2ca993e8 229 U_FALLTHROUGH;
46f4442e
A
230
231 case UCOL_PRIMARY:
232 primary = ucol_primaryOrder(ce);
233 }
234
729e4ab9
A
235 // **** This should probably handle continuations too. ****
236 // **** That means that we need 24 bits for the primary ****
237 // **** instead of the 16 that we're currently using. ****
238 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
239 // **** Another complication with continuations is that ****
240 // **** the *second* CE is marked as a continuation, so ****
241 // **** we always have to peek ahead to know how long ****
242 // **** the primary is... ****
57a6839d
A
243 if ((toShift && variableTop > ce && primary != 0)
244 || (isShifted && primary == 0)) {
46f4442e
A
245
246 if (primary == 0) {
247 return UCOL_IGNORABLE;
248 }
249
57a6839d 250 if (strength >= UCOL_QUATERNARY) {
46f4442e
A
251 quaternary = primary;
252 }
253
254 primary = secondary = tertiary = 0;
57a6839d 255 isShifted = TRUE;
46f4442e 256 } else {
57a6839d 257 if (strength >= UCOL_QUATERNARY) {
46f4442e
A
258 quaternary = 0xFFFF;
259 }
260
57a6839d 261 isShifted = FALSE;
46f4442e
A
262 }
263
46f4442e
A
264 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
265}
266
57a6839d 267U_NAMESPACE_END
46f4442e 268
b75a7d8f
A
269/* public methods ---------------------------------------------------- */
270
b75a7d8f
A
271U_CAPI UCollationElements* U_EXPORT2
272ucol_openElements(const UCollator *coll,
273 const UChar *text,
274 int32_t textLength,
275 UErrorCode *status)
276{
46f4442e
A
277 if (U_FAILURE(*status)) {
278 return NULL;
279 }
57a6839d
A
280 if (coll == NULL || (text == NULL && textLength != 0)) {
281 *status = U_ILLEGAL_ARGUMENT_ERROR;
282 return NULL;
283 }
284 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
285 if (rbc == NULL) {
286 *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBasedCollator
46f4442e
A
287 return NULL;
288 }
729e4ab9 289
57a6839d
A
290 UnicodeString s((UBool)(textLength < 0), text, textLength);
291 CollationElementIterator *cei = rbc->createCollationElementIterator(s);
292 if (cei == NULL) {
293 *status = U_MEMORY_ALLOCATION_ERROR;
294 return NULL;
46f4442e 295 }
b75a7d8f 296
57a6839d 297 return cei->toUCollationElements();
b75a7d8f
A
298}
299
729e4ab9 300
b75a7d8f
A
301U_CAPI void U_EXPORT2
302ucol_closeElements(UCollationElements *elems)
303{
57a6839d 304 delete CollationElementIterator::fromUCollationElements(elems);
b75a7d8f
A
305}
306
307U_CAPI void U_EXPORT2
308ucol_reset(UCollationElements *elems)
309{
57a6839d 310 CollationElementIterator::fromUCollationElements(elems)->reset();
729e4ab9
A
311}
312
b75a7d8f
A
313U_CAPI int32_t U_EXPORT2
314ucol_next(UCollationElements *elems,
315 UErrorCode *status)
316{
46f4442e
A
317 if (U_FAILURE(*status)) {
318 return UCOL_NULLORDER;
319 }
b75a7d8f 320
57a6839d 321 return CollationElementIterator::fromUCollationElements(elems)->next(*status);
46f4442e
A
322}
323
57a6839d 324// temporarily restore the following removed internal function which is used by Spotlight
46f4442e
A
325U_CAPI int64_t U_EXPORT2
326ucol_nextProcessed(UCollationElements *elems,
327 int32_t *ixLow,
328 int32_t *ixHigh,
329 UErrorCode *status)
330{
57a6839d
A
331 return (UCollationPCE::UCollationPCE(elems)).nextProcessed(ixLow, ixHigh, status);
332}
333
334
335U_NAMESPACE_BEGIN
336
337int64_t
338UCollationPCE::nextProcessed(
339 int32_t *ixLow,
340 int32_t *ixHigh,
341 UErrorCode *status)
342{
46f4442e
A
343 int64_t result = UCOL_IGNORABLE;
344 uint32_t low = 0, high = 0;
345
346 if (U_FAILURE(*status)) {
347 return UCOL_PROCESSED_NULLORDER;
348 }
349
57a6839d 350 pceBuffer.reset();
46f4442e
A
351
352 do {
57a6839d
A
353 low = cei->getOffset();
354 int32_t ce = cei->next(*status);
355 high = cei->getOffset();
46f4442e 356
57a6839d 357 if (ce == UCOL_NULLORDER) {
46f4442e
A
358 result = UCOL_PROCESSED_NULLORDER;
359 break;
360 }
361
57a6839d 362 result = processCE((uint32_t)ce);
46f4442e
A
363 } while (result == UCOL_IGNORABLE);
364
365 if (ixLow != NULL) {
366 *ixLow = low;
367 }
368
369 if (ixHigh != NULL) {
370 *ixHigh = high;
371 }
372
373 return result;
b75a7d8f
A
374}
375
57a6839d
A
376U_NAMESPACE_END
377
b75a7d8f
A
378U_CAPI int32_t U_EXPORT2
379ucol_previous(UCollationElements *elems,
380 UErrorCode *status)
381{
46f4442e
A
382 if(U_FAILURE(*status)) {
383 return UCOL_NULLORDER;
384 }
57a6839d 385 return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
46f4442e
A
386}
387
57a6839d 388// temporarily restore the following removed internal function which is used by Spotlight
46f4442e
A
389U_CAPI int64_t U_EXPORT2
390ucol_previousProcessed(UCollationElements *elems,
391 int32_t *ixLow,
392 int32_t *ixHigh,
393 UErrorCode *status)
394{
57a6839d
A
395 return (UCollationPCE::UCollationPCE(elems)).previousProcessed(ixLow, ixHigh, status);
396}
397
398U_NAMESPACE_BEGIN
399
400int64_t
401UCollationPCE::previousProcessed(
402 int32_t *ixLow,
403 int32_t *ixHigh,
404 UErrorCode *status)
405{
46f4442e 406 int64_t result = UCOL_IGNORABLE;
46f4442e
A
407 int32_t low = 0, high = 0;
408
409 if (U_FAILURE(*status)) {
410 return UCOL_PROCESSED_NULLORDER;
411 }
b75a7d8f 412
57a6839d 413 // pceBuffer.reset();
b75a7d8f 414
2ca993e8 415 while (pceBuffer.isEmpty()) {
46f4442e
A
416 // buffer raw CEs up to non-ignorable primary
417 RCEBuffer rceb;
57a6839d 418 int32_t ce;
46f4442e
A
419
420 // **** do we need to reset rceb, or will it always be empty at this point ****
421 do {
57a6839d
A
422 high = cei->getOffset();
423 ce = cei->previous(*status);
424 low = cei->getOffset();
46f4442e 425
57a6839d 426 if (ce == UCOL_NULLORDER) {
2ca993e8 427 if (!rceb.isEmpty()) {
46f4442e
A
428 break;
429 }
430
431 goto finish;
432 }
433
2ca993e8
A
434 rceb.put((uint32_t)ce, low, high, *status);
435 } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)));
46f4442e
A
436
437 // process the raw CEs
2ca993e8 438 while (U_SUCCESS(*status) && !rceb.isEmpty()) {
46f4442e
A
439 const RCEI *rcei = rceb.get();
440
57a6839d 441 result = processCE(rcei->ce);
46f4442e
A
442
443 if (result != UCOL_IGNORABLE) {
2ca993e8 444 pceBuffer.put(result, rcei->low, rcei->high, *status);
46f4442e
A
445 }
446 }
2ca993e8
A
447 if (U_FAILURE(*status)) {
448 return UCOL_PROCESSED_NULLORDER;
449 }
46f4442e 450 }
b75a7d8f 451
46f4442e 452finish:
2ca993e8 453 if (pceBuffer.isEmpty()) {
46f4442e
A
454 // **** Is -1 the right value for ixLow, ixHigh? ****
455 if (ixLow != NULL) {
456 *ixLow = -1;
457 }
458
459 if (ixHigh != NULL) {
460 *ixHigh = -1
461 ;
462 }
463 return UCOL_PROCESSED_NULLORDER;
b75a7d8f
A
464 }
465
57a6839d 466 const PCEI *pcei = pceBuffer.get();
46f4442e
A
467
468 if (ixLow != NULL) {
469 *ixLow = pcei->low;
470 }
471
472 if (ixHigh != NULL) {
473 *ixHigh = pcei->high;
474 }
475
476 return pcei->ce;
b75a7d8f
A
477}
478
57a6839d
A
479U_NAMESPACE_END
480
b75a7d8f
A
481U_CAPI int32_t U_EXPORT2
482ucol_getMaxExpansion(const UCollationElements *elems,
483 int32_t order)
484{
57a6839d 485 return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
46f4442e 486
57a6839d
A
487 // TODO: The old code masked the order according to strength and then did a binary search.
488 // However this was probably at least partially broken because of the following comment.
489 // Still, it might have found a match when this version may not.
46f4442e
A
490
491 // FIXME: with a masked search, there might be more than one hit,
492 // so we need to look forward and backward from the match to find all
493 // of the hits...
b75a7d8f 494}
57a6839d 495
b75a7d8f
A
496U_CAPI void U_EXPORT2
497ucol_setText( UCollationElements *elems,
498 const UChar *text,
499 int32_t textLength,
500 UErrorCode *status)
501{
46f4442e
A
502 if (U_FAILURE(*status)) {
503 return;
504 }
b75a7d8f 505
57a6839d
A
506 if ((text == NULL && textLength != 0)) {
507 *status = U_ILLEGAL_ARGUMENT_ERROR;
508 return;
729e4ab9 509 }
57a6839d
A
510 UnicodeString s((UBool)(textLength < 0), text, textLength);
511 return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
b75a7d8f
A
512}
513
514U_CAPI int32_t U_EXPORT2
515ucol_getOffset(const UCollationElements *elems)
516{
57a6839d 517 return CollationElementIterator::fromUCollationElements(elems)->getOffset();
b75a7d8f
A
518}
519
520U_CAPI void U_EXPORT2
521ucol_setOffset(UCollationElements *elems,
522 int32_t offset,
523 UErrorCode *status)
524{
46f4442e
A
525 if (U_FAILURE(*status)) {
526 return;
527 }
b75a7d8f 528
57a6839d 529 CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
b75a7d8f
A
530}
531
532U_CAPI int32_t U_EXPORT2
533ucol_primaryOrder (int32_t order)
534{
57a6839d 535 return (order >> 16) & 0xffff;
b75a7d8f
A
536}
537
538U_CAPI int32_t U_EXPORT2
539ucol_secondaryOrder (int32_t order)
540{
57a6839d 541 return (order >> 8) & 0xff;
b75a7d8f
A
542}
543
544U_CAPI int32_t U_EXPORT2
545ucol_tertiaryOrder (int32_t order)
546{
57a6839d 547 return order & 0xff;
729e4ab9
A
548}
549
b75a7d8f 550#endif /* #if !UCONFIG_NO_COLLATION */