]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucoleitr.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / ucoleitr.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
2ca993e8 5* Copyright (C) 2001-2016, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7******************************************************************************
8*
9* File ucoleitr.cpp
10*
11* Modification History:
12*
13* Date Name Description
14* 02/15/2001 synwee Modified all methods to process its own function
15* instead of calling the equivalent c++ api (coleitr.h)
57a6839d 16* 2012-2014 markus Rewritten in C++ again.
b75a7d8f
A
17******************************************************************************/
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
57a6839d
A
23#include "unicode/coleitr.h"
24#include "unicode/tblcoll.h"
b75a7d8f
A
25#include "unicode/ucoleitr.h"
26#include "unicode/ustring.h"
27#include "unicode/sortkey.h"
46f4442e 28#include "unicode/uobject.h"
b75a7d8f 29#include "cmemory.h"
57a6839d
A
30#include "usrchimp.h"
31
b75a7d8f
A
32U_NAMESPACE_USE
33
34#define BUFFER_LENGTH 100
35
46f4442e
A
36#define DEFAULT_BUFFER_SIZE 16
37#define BUFFER_GROW 8
38
a62d09fc 39#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
46f4442e 40
a62d09fc 41#define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
46f4442e 42
46f4442e
A
43#define DELETE_ARRAY(array) uprv_free((void *) (array))
44
46f4442e
A
45struct RCEI
46{
47 uint32_t ce;
48 int32_t low;
49 int32_t high;
50};
51
52U_NAMESPACE_BEGIN
53
54struct RCEBuffer
55{
56 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
57 RCEI *buffer;
58 int32_t bufferIndex;
59 int32_t bufferSize;
60
61 RCEBuffer();
62 ~RCEBuffer();
63
2ca993e8
A
64 UBool isEmpty() const;
65 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode);
46f4442e
A
66 const RCEI *get();
67};
68
69RCEBuffer::RCEBuffer()
70{
71 buffer = defaultBuffer;
72 bufferIndex = 0;
b331163b 73 bufferSize = UPRV_LENGTHOF(defaultBuffer);
46f4442e
A
74}
75
76RCEBuffer::~RCEBuffer()
77{
78 if (buffer != defaultBuffer) {
79 DELETE_ARRAY(buffer);
80 }
81}
82
2ca993e8 83UBool RCEBuffer::isEmpty() const
46f4442e
A
84{
85 return bufferIndex <= 0;
86}
87
2ca993e8 88void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
46f4442e 89{
2ca993e8
A
90 if (U_FAILURE(errorCode)) {
91 return;
92 }
46f4442e
A
93 if (bufferIndex >= bufferSize) {
94 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
2ca993e8
A
95 if (newBuffer == NULL) {
96 errorCode = U_MEMORY_ALLOCATION_ERROR;
97 return;
98 }
46f4442e
A
99
100 ARRAY_COPY(newBuffer, buffer, bufferSize);
101
102 if (buffer != defaultBuffer) {
103 DELETE_ARRAY(buffer);
104 }
105
106 buffer = newBuffer;
107 bufferSize += BUFFER_GROW;
108 }
109
110 buffer[bufferIndex].ce = ce;
111 buffer[bufferIndex].low = ixLow;
112 buffer[bufferIndex].high = ixHigh;
113
114 bufferIndex += 1;
115}
116
117const RCEI *RCEBuffer::get()
118{
119 if (bufferIndex > 0) {
120 return &buffer[--bufferIndex];
121 }
122
123 return NULL;
124}
125
46f4442e
A
126PCEBuffer::PCEBuffer()
127{
128 buffer = defaultBuffer;
129 bufferIndex = 0;
b331163b 130 bufferSize = UPRV_LENGTHOF(defaultBuffer);
46f4442e
A
131}
132
133PCEBuffer::~PCEBuffer()
134{
135 if (buffer != defaultBuffer) {
136 DELETE_ARRAY(buffer);
137 }
138}
139
140void PCEBuffer::reset()
141{
142 bufferIndex = 0;
143}
144
2ca993e8 145UBool PCEBuffer::isEmpty() const
46f4442e
A
146{
147 return bufferIndex <= 0;
148}
149
2ca993e8 150void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
46f4442e 151{
2ca993e8
A
152 if (U_FAILURE(errorCode)) {
153 return;
154 }
46f4442e
A
155 if (bufferIndex >= bufferSize) {
156 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
2ca993e8
A
157 if (newBuffer == NULL) {
158 errorCode = U_MEMORY_ALLOCATION_ERROR;
159 return;
160 }
46f4442e
A
161
162 ARRAY_COPY(newBuffer, buffer, bufferSize);
163
164 if (buffer != defaultBuffer) {
165 DELETE_ARRAY(buffer);
166 }
167
168 buffer = newBuffer;
169 bufferSize += BUFFER_GROW;
170 }
171
172 buffer[bufferIndex].ce = ce;
173 buffer[bufferIndex].low = ixLow;
174 buffer[bufferIndex].high = ixHigh;
175
176 bufferIndex += 1;
177}
178
179const PCEI *PCEBuffer::get()
180{
181 if (bufferIndex > 0) {
182 return &buffer[--bufferIndex];
183 }
184
185 return NULL;
186}
187
57a6839d 188UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
46f4442e 189
57a6839d 190UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
46f4442e 191
57a6839d
A
192void UCollationPCE::init(UCollationElements *elems) {
193 init(CollationElementIterator::fromUCollationElements(elems));
194}
46f4442e 195
57a6839d 196void UCollationPCE::init(CollationElementIterator *iter)
46f4442e 197{
57a6839d
A
198 cei = iter;
199 init(*iter->rbc_);
46f4442e
A
200}
201
57a6839d 202void UCollationPCE::init(const Collator &coll)
46f4442e
A
203{
204 UErrorCode status = U_ZERO_ERROR;
205
57a6839d
A
206 strength = coll.getAttribute(UCOL_STRENGTH, status);
207 toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
46f4442e 208 isShifted = FALSE;
57a6839d 209 variableTop = coll.getVariableTop(status);
46f4442e
A
210}
211
212UCollationPCE::~UCollationPCE()
213{
214 // nothing to do
215}
216
57a6839d 217uint64_t UCollationPCE::processCE(uint32_t ce)
46f4442e
A
218{
219 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
220
221 // This is clean, but somewhat slow...
222 // We could apply the mask to ce and then
223 // just get all three orders...
57a6839d 224 switch(strength) {
46f4442e
A
225 default:
226 tertiary = ucol_tertiaryOrder(ce);
2ca993e8 227 U_FALLTHROUGH;
46f4442e
A
228
229 case UCOL_SECONDARY:
230 secondary = ucol_secondaryOrder(ce);
2ca993e8 231 U_FALLTHROUGH;
46f4442e
A
232
233 case UCOL_PRIMARY:
234 primary = ucol_primaryOrder(ce);
235 }
236
729e4ab9
A
237 // **** This should probably handle continuations too. ****
238 // **** That means that we need 24 bits for the primary ****
239 // **** instead of the 16 that we're currently using. ****
240 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
241 // **** Another complication with continuations is that ****
242 // **** the *second* CE is marked as a continuation, so ****
243 // **** we always have to peek ahead to know how long ****
244 // **** the primary is... ****
57a6839d
A
245 if ((toShift && variableTop > ce && primary != 0)
246 || (isShifted && primary == 0)) {
46f4442e
A
247
248 if (primary == 0) {
249 return UCOL_IGNORABLE;
250 }
251
57a6839d 252 if (strength >= UCOL_QUATERNARY) {
46f4442e
A
253 quaternary = primary;
254 }
255
256 primary = secondary = tertiary = 0;
57a6839d 257 isShifted = TRUE;
46f4442e 258 } else {
57a6839d 259 if (strength >= UCOL_QUATERNARY) {
46f4442e
A
260 quaternary = 0xFFFF;
261 }
262
57a6839d 263 isShifted = FALSE;
46f4442e
A
264 }
265
46f4442e
A
266 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
267}
268
57a6839d 269U_NAMESPACE_END
46f4442e 270
b75a7d8f
A
271/* public methods ---------------------------------------------------- */
272
b75a7d8f
A
273U_CAPI UCollationElements* U_EXPORT2
274ucol_openElements(const UCollator *coll,
275 const UChar *text,
276 int32_t textLength,
277 UErrorCode *status)
278{
46f4442e
A
279 if (U_FAILURE(*status)) {
280 return NULL;
281 }
57a6839d
A
282 if (coll == NULL || (text == NULL && textLength != 0)) {
283 *status = U_ILLEGAL_ARGUMENT_ERROR;
284 return NULL;
285 }
286 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
287 if (rbc == NULL) {
288 *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBasedCollator
46f4442e
A
289 return NULL;
290 }
729e4ab9 291
57a6839d
A
292 UnicodeString s((UBool)(textLength < 0), text, textLength);
293 CollationElementIterator *cei = rbc->createCollationElementIterator(s);
294 if (cei == NULL) {
295 *status = U_MEMORY_ALLOCATION_ERROR;
296 return NULL;
46f4442e 297 }
b75a7d8f 298
57a6839d 299 return cei->toUCollationElements();
b75a7d8f
A
300}
301
729e4ab9 302
b75a7d8f
A
303U_CAPI void U_EXPORT2
304ucol_closeElements(UCollationElements *elems)
305{
57a6839d 306 delete CollationElementIterator::fromUCollationElements(elems);
b75a7d8f
A
307}
308
309U_CAPI void U_EXPORT2
310ucol_reset(UCollationElements *elems)
311{
57a6839d 312 CollationElementIterator::fromUCollationElements(elems)->reset();
729e4ab9
A
313}
314
b75a7d8f
A
315U_CAPI int32_t U_EXPORT2
316ucol_next(UCollationElements *elems,
317 UErrorCode *status)
318{
46f4442e
A
319 if (U_FAILURE(*status)) {
320 return UCOL_NULLORDER;
321 }
b75a7d8f 322
57a6839d 323 return CollationElementIterator::fromUCollationElements(elems)->next(*status);
46f4442e
A
324}
325
57a6839d 326// temporarily restore the following removed internal function which is used by Spotlight
46f4442e
A
327U_CAPI int64_t U_EXPORT2
328ucol_nextProcessed(UCollationElements *elems,
329 int32_t *ixLow,
330 int32_t *ixHigh,
331 UErrorCode *status)
332{
3bb97ae2 333 return (UCollationPCE(elems)).nextProcessed(ixLow, ixHigh, status);
57a6839d
A
334}
335
336
337U_NAMESPACE_BEGIN
338
339int64_t
340UCollationPCE::nextProcessed(
341 int32_t *ixLow,
342 int32_t *ixHigh,
343 UErrorCode *status)
344{
46f4442e
A
345 int64_t result = UCOL_IGNORABLE;
346 uint32_t low = 0, high = 0;
347
348 if (U_FAILURE(*status)) {
349 return UCOL_PROCESSED_NULLORDER;
350 }
351
57a6839d 352 pceBuffer.reset();
46f4442e
A
353
354 do {
57a6839d
A
355 low = cei->getOffset();
356 int32_t ce = cei->next(*status);
357 high = cei->getOffset();
46f4442e 358
57a6839d 359 if (ce == UCOL_NULLORDER) {
46f4442e
A
360 result = UCOL_PROCESSED_NULLORDER;
361 break;
362 }
363
57a6839d 364 result = processCE((uint32_t)ce);
46f4442e
A
365 } while (result == UCOL_IGNORABLE);
366
367 if (ixLow != NULL) {
368 *ixLow = low;
369 }
370
371 if (ixHigh != NULL) {
372 *ixHigh = high;
373 }
374
375 return result;
b75a7d8f
A
376}
377
57a6839d
A
378U_NAMESPACE_END
379
b75a7d8f
A
380U_CAPI int32_t U_EXPORT2
381ucol_previous(UCollationElements *elems,
382 UErrorCode *status)
383{
46f4442e
A
384 if(U_FAILURE(*status)) {
385 return UCOL_NULLORDER;
386 }
57a6839d 387 return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
46f4442e
A
388}
389
57a6839d 390// temporarily restore the following removed internal function which is used by Spotlight
46f4442e
A
391U_CAPI int64_t U_EXPORT2
392ucol_previousProcessed(UCollationElements *elems,
393 int32_t *ixLow,
394 int32_t *ixHigh,
395 UErrorCode *status)
396{
3bb97ae2 397 return (UCollationPCE(elems)).previousProcessed(ixLow, ixHigh, status);
57a6839d
A
398}
399
400U_NAMESPACE_BEGIN
401
402int64_t
403UCollationPCE::previousProcessed(
404 int32_t *ixLow,
405 int32_t *ixHigh,
406 UErrorCode *status)
407{
46f4442e 408 int64_t result = UCOL_IGNORABLE;
46f4442e
A
409 int32_t low = 0, high = 0;
410
411 if (U_FAILURE(*status)) {
412 return UCOL_PROCESSED_NULLORDER;
413 }
b75a7d8f 414
57a6839d 415 // pceBuffer.reset();
b75a7d8f 416
2ca993e8 417 while (pceBuffer.isEmpty()) {
46f4442e
A
418 // buffer raw CEs up to non-ignorable primary
419 RCEBuffer rceb;
57a6839d 420 int32_t ce;
46f4442e
A
421
422 // **** do we need to reset rceb, or will it always be empty at this point ****
423 do {
57a6839d
A
424 high = cei->getOffset();
425 ce = cei->previous(*status);
426 low = cei->getOffset();
46f4442e 427
57a6839d 428 if (ce == UCOL_NULLORDER) {
2ca993e8 429 if (!rceb.isEmpty()) {
46f4442e
A
430 break;
431 }
432
433 goto finish;
434 }
435
2ca993e8
A
436 rceb.put((uint32_t)ce, low, high, *status);
437 } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)));
46f4442e
A
438
439 // process the raw CEs
2ca993e8 440 while (U_SUCCESS(*status) && !rceb.isEmpty()) {
46f4442e
A
441 const RCEI *rcei = rceb.get();
442
57a6839d 443 result = processCE(rcei->ce);
46f4442e
A
444
445 if (result != UCOL_IGNORABLE) {
2ca993e8 446 pceBuffer.put(result, rcei->low, rcei->high, *status);
46f4442e
A
447 }
448 }
2ca993e8
A
449 if (U_FAILURE(*status)) {
450 return UCOL_PROCESSED_NULLORDER;
451 }
46f4442e 452 }
b75a7d8f 453
46f4442e 454finish:
2ca993e8 455 if (pceBuffer.isEmpty()) {
46f4442e
A
456 // **** Is -1 the right value for ixLow, ixHigh? ****
457 if (ixLow != NULL) {
458 *ixLow = -1;
459 }
460
461 if (ixHigh != NULL) {
462 *ixHigh = -1
463 ;
464 }
465 return UCOL_PROCESSED_NULLORDER;
b75a7d8f
A
466 }
467
57a6839d 468 const PCEI *pcei = pceBuffer.get();
46f4442e
A
469
470 if (ixLow != NULL) {
471 *ixLow = pcei->low;
472 }
473
474 if (ixHigh != NULL) {
475 *ixHigh = pcei->high;
476 }
477
478 return pcei->ce;
b75a7d8f
A
479}
480
57a6839d
A
481U_NAMESPACE_END
482
b75a7d8f
A
483U_CAPI int32_t U_EXPORT2
484ucol_getMaxExpansion(const UCollationElements *elems,
485 int32_t order)
486{
57a6839d 487 return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
46f4442e 488
57a6839d
A
489 // TODO: The old code masked the order according to strength and then did a binary search.
490 // However this was probably at least partially broken because of the following comment.
491 // Still, it might have found a match when this version may not.
46f4442e
A
492
493 // FIXME: with a masked search, there might be more than one hit,
494 // so we need to look forward and backward from the match to find all
495 // of the hits...
b75a7d8f 496}
57a6839d 497
b75a7d8f
A
498U_CAPI void U_EXPORT2
499ucol_setText( UCollationElements *elems,
500 const UChar *text,
501 int32_t textLength,
502 UErrorCode *status)
503{
46f4442e
A
504 if (U_FAILURE(*status)) {
505 return;
506 }
b75a7d8f 507
57a6839d
A
508 if ((text == NULL && textLength != 0)) {
509 *status = U_ILLEGAL_ARGUMENT_ERROR;
510 return;
729e4ab9 511 }
57a6839d
A
512 UnicodeString s((UBool)(textLength < 0), text, textLength);
513 return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
b75a7d8f
A
514}
515
516U_CAPI int32_t U_EXPORT2
517ucol_getOffset(const UCollationElements *elems)
518{
57a6839d 519 return CollationElementIterator::fromUCollationElements(elems)->getOffset();
b75a7d8f
A
520}
521
522U_CAPI void U_EXPORT2
523ucol_setOffset(UCollationElements *elems,
524 int32_t offset,
525 UErrorCode *status)
526{
46f4442e
A
527 if (U_FAILURE(*status)) {
528 return;
529 }
b75a7d8f 530
57a6839d 531 CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
b75a7d8f
A
532}
533
534U_CAPI int32_t U_EXPORT2
535ucol_primaryOrder (int32_t order)
536{
57a6839d 537 return (order >> 16) & 0xffff;
b75a7d8f
A
538}
539
540U_CAPI int32_t U_EXPORT2
541ucol_secondaryOrder (int32_t order)
542{
57a6839d 543 return (order >> 8) & 0xff;
b75a7d8f
A
544}
545
546U_CAPI int32_t U_EXPORT2
547ucol_tertiaryOrder (int32_t order)
548{
57a6839d 549 return order & 0xff;
729e4ab9
A
550}
551
b75a7d8f 552#endif /* #if !UCONFIG_NO_COLLATION */