]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/ucoleitr.cpp
ICU-551.30.tar.gz
[apple/icu.git] / icuSources / i18n / ucoleitr.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 2001-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 *
7 * File ucoleitr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/15/2001 synwee Modified all methods to process its own function
13 * instead of calling the equivalent c++ api (coleitr.h)
14 * 2012-2014 markus Rewritten in C++ again.
15 ******************************************************************************/
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_COLLATION
20
21 #include "unicode/coleitr.h"
22 #include "unicode/tblcoll.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/ustring.h"
25 #include "unicode/sortkey.h"
26 #include "unicode/uobject.h"
27 #include "cmemory.h"
28 #include "usrchimp.h"
29
30 U_NAMESPACE_USE
31
32 #define BUFFER_LENGTH 100
33
34 #define DEFAULT_BUFFER_SIZE 16
35 #define BUFFER_GROW 8
36
37 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
38
39 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
40
41 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
42
43 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
44
45 #define DELETE_ARRAY(array) uprv_free((void *) (array))
46
47 struct RCEI
48 {
49 uint32_t ce;
50 int32_t low;
51 int32_t high;
52 };
53
54 U_NAMESPACE_BEGIN
55
56 struct RCEBuffer
57 {
58 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
59 RCEI *buffer;
60 int32_t bufferIndex;
61 int32_t bufferSize;
62
63 RCEBuffer();
64 ~RCEBuffer();
65
66 UBool empty() const;
67 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
68 const RCEI *get();
69 };
70
71 RCEBuffer::RCEBuffer()
72 {
73 buffer = defaultBuffer;
74 bufferIndex = 0;
75 bufferSize = UPRV_LENGTHOF(defaultBuffer);
76 }
77
78 RCEBuffer::~RCEBuffer()
79 {
80 if (buffer != defaultBuffer) {
81 DELETE_ARRAY(buffer);
82 }
83 }
84
85 UBool RCEBuffer::empty() const
86 {
87 return bufferIndex <= 0;
88 }
89
90 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
91 {
92 if (bufferIndex >= bufferSize) {
93 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
94
95 ARRAY_COPY(newBuffer, buffer, bufferSize);
96
97 if (buffer != defaultBuffer) {
98 DELETE_ARRAY(buffer);
99 }
100
101 buffer = newBuffer;
102 bufferSize += BUFFER_GROW;
103 }
104
105 buffer[bufferIndex].ce = ce;
106 buffer[bufferIndex].low = ixLow;
107 buffer[bufferIndex].high = ixHigh;
108
109 bufferIndex += 1;
110 }
111
112 const RCEI *RCEBuffer::get()
113 {
114 if (bufferIndex > 0) {
115 return &buffer[--bufferIndex];
116 }
117
118 return NULL;
119 }
120
121 PCEBuffer::PCEBuffer()
122 {
123 buffer = defaultBuffer;
124 bufferIndex = 0;
125 bufferSize = UPRV_LENGTHOF(defaultBuffer);
126 }
127
128 PCEBuffer::~PCEBuffer()
129 {
130 if (buffer != defaultBuffer) {
131 DELETE_ARRAY(buffer);
132 }
133 }
134
135 void PCEBuffer::reset()
136 {
137 bufferIndex = 0;
138 }
139
140 UBool PCEBuffer::empty() const
141 {
142 return bufferIndex <= 0;
143 }
144
145 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
146 {
147 if (bufferIndex >= bufferSize) {
148 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
149
150 ARRAY_COPY(newBuffer, buffer, bufferSize);
151
152 if (buffer != defaultBuffer) {
153 DELETE_ARRAY(buffer);
154 }
155
156 buffer = newBuffer;
157 bufferSize += BUFFER_GROW;
158 }
159
160 buffer[bufferIndex].ce = ce;
161 buffer[bufferIndex].low = ixLow;
162 buffer[bufferIndex].high = ixHigh;
163
164 bufferIndex += 1;
165 }
166
167 const PCEI *PCEBuffer::get()
168 {
169 if (bufferIndex > 0) {
170 return &buffer[--bufferIndex];
171 }
172
173 return NULL;
174 }
175
176 UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
177
178 UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
179
180 void UCollationPCE::init(UCollationElements *elems) {
181 init(CollationElementIterator::fromUCollationElements(elems));
182 }
183
184 void UCollationPCE::init(CollationElementIterator *iter)
185 {
186 cei = iter;
187 init(*iter->rbc_);
188 }
189
190 void UCollationPCE::init(const Collator &coll)
191 {
192 UErrorCode status = U_ZERO_ERROR;
193
194 strength = coll.getAttribute(UCOL_STRENGTH, status);
195 toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
196 isShifted = FALSE;
197 variableTop = coll.getVariableTop(status);
198 }
199
200 UCollationPCE::~UCollationPCE()
201 {
202 // nothing to do
203 }
204
205 uint64_t UCollationPCE::processCE(uint32_t ce)
206 {
207 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
208
209 // This is clean, but somewhat slow...
210 // We could apply the mask to ce and then
211 // just get all three orders...
212 switch(strength) {
213 default:
214 tertiary = ucol_tertiaryOrder(ce);
215 /* note fall-through */
216
217 case UCOL_SECONDARY:
218 secondary = ucol_secondaryOrder(ce);
219 /* note fall-through */
220
221 case UCOL_PRIMARY:
222 primary = ucol_primaryOrder(ce);
223 }
224
225 // **** This should probably handle continuations too. ****
226 // **** That means that we need 24 bits for the primary ****
227 // **** instead of the 16 that we're currently using. ****
228 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
229 // **** Another complication with continuations is that ****
230 // **** the *second* CE is marked as a continuation, so ****
231 // **** we always have to peek ahead to know how long ****
232 // **** the primary is... ****
233 if ((toShift && variableTop > ce && primary != 0)
234 || (isShifted && primary == 0)) {
235
236 if (primary == 0) {
237 return UCOL_IGNORABLE;
238 }
239
240 if (strength >= UCOL_QUATERNARY) {
241 quaternary = primary;
242 }
243
244 primary = secondary = tertiary = 0;
245 isShifted = TRUE;
246 } else {
247 if (strength >= UCOL_QUATERNARY) {
248 quaternary = 0xFFFF;
249 }
250
251 isShifted = FALSE;
252 }
253
254 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
255 }
256
257 U_NAMESPACE_END
258
259 /* public methods ---------------------------------------------------- */
260
261 U_CAPI UCollationElements* U_EXPORT2
262 ucol_openElements(const UCollator *coll,
263 const UChar *text,
264 int32_t textLength,
265 UErrorCode *status)
266 {
267 if (U_FAILURE(*status)) {
268 return NULL;
269 }
270 if (coll == NULL || (text == NULL && textLength != 0)) {
271 *status = U_ILLEGAL_ARGUMENT_ERROR;
272 return NULL;
273 }
274 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
275 if (rbc == NULL) {
276 *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBasedCollator
277 return NULL;
278 }
279
280 UnicodeString s((UBool)(textLength < 0), text, textLength);
281 CollationElementIterator *cei = rbc->createCollationElementIterator(s);
282 if (cei == NULL) {
283 *status = U_MEMORY_ALLOCATION_ERROR;
284 return NULL;
285 }
286
287 return cei->toUCollationElements();
288 }
289
290
291 U_CAPI void U_EXPORT2
292 ucol_closeElements(UCollationElements *elems)
293 {
294 delete CollationElementIterator::fromUCollationElements(elems);
295 }
296
297 U_CAPI void U_EXPORT2
298 ucol_reset(UCollationElements *elems)
299 {
300 CollationElementIterator::fromUCollationElements(elems)->reset();
301 }
302
303 U_CAPI int32_t U_EXPORT2
304 ucol_next(UCollationElements *elems,
305 UErrorCode *status)
306 {
307 if (U_FAILURE(*status)) {
308 return UCOL_NULLORDER;
309 }
310
311 return CollationElementIterator::fromUCollationElements(elems)->next(*status);
312 }
313
314 // temporarily restore the following removed internal function which is used by Spotlight
315 U_CAPI int64_t U_EXPORT2
316 ucol_nextProcessed(UCollationElements *elems,
317 int32_t *ixLow,
318 int32_t *ixHigh,
319 UErrorCode *status)
320 {
321 return (UCollationPCE::UCollationPCE(elems)).nextProcessed(ixLow, ixHigh, status);
322 }
323
324
325 U_NAMESPACE_BEGIN
326
327 int64_t
328 UCollationPCE::nextProcessed(
329 int32_t *ixLow,
330 int32_t *ixHigh,
331 UErrorCode *status)
332 {
333 int64_t result = UCOL_IGNORABLE;
334 uint32_t low = 0, high = 0;
335
336 if (U_FAILURE(*status)) {
337 return UCOL_PROCESSED_NULLORDER;
338 }
339
340 pceBuffer.reset();
341
342 do {
343 low = cei->getOffset();
344 int32_t ce = cei->next(*status);
345 high = cei->getOffset();
346
347 if (ce == UCOL_NULLORDER) {
348 result = UCOL_PROCESSED_NULLORDER;
349 break;
350 }
351
352 result = processCE((uint32_t)ce);
353 } while (result == UCOL_IGNORABLE);
354
355 if (ixLow != NULL) {
356 *ixLow = low;
357 }
358
359 if (ixHigh != NULL) {
360 *ixHigh = high;
361 }
362
363 return result;
364 }
365
366 U_NAMESPACE_END
367
368 U_CAPI int32_t U_EXPORT2
369 ucol_previous(UCollationElements *elems,
370 UErrorCode *status)
371 {
372 if(U_FAILURE(*status)) {
373 return UCOL_NULLORDER;
374 }
375 return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
376 }
377
378 // temporarily restore the following removed internal function which is used by Spotlight
379 U_CAPI int64_t U_EXPORT2
380 ucol_previousProcessed(UCollationElements *elems,
381 int32_t *ixLow,
382 int32_t *ixHigh,
383 UErrorCode *status)
384 {
385 return (UCollationPCE::UCollationPCE(elems)).previousProcessed(ixLow, ixHigh, status);
386 }
387
388 U_NAMESPACE_BEGIN
389
390 int64_t
391 UCollationPCE::previousProcessed(
392 int32_t *ixLow,
393 int32_t *ixHigh,
394 UErrorCode *status)
395 {
396 int64_t result = UCOL_IGNORABLE;
397 int32_t low = 0, high = 0;
398
399 if (U_FAILURE(*status)) {
400 return UCOL_PROCESSED_NULLORDER;
401 }
402
403 // pceBuffer.reset();
404
405 while (pceBuffer.empty()) {
406 // buffer raw CEs up to non-ignorable primary
407 RCEBuffer rceb;
408 int32_t ce;
409
410 // **** do we need to reset rceb, or will it always be empty at this point ****
411 do {
412 high = cei->getOffset();
413 ce = cei->previous(*status);
414 low = cei->getOffset();
415
416 if (ce == UCOL_NULLORDER) {
417 if (! rceb.empty()) {
418 break;
419 }
420
421 goto finish;
422 }
423
424 rceb.put((uint32_t)ce, low, high);
425 } while ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce));
426
427 // process the raw CEs
428 while (! rceb.empty()) {
429 const RCEI *rcei = rceb.get();
430
431 result = processCE(rcei->ce);
432
433 if (result != UCOL_IGNORABLE) {
434 pceBuffer.put(result, rcei->low, rcei->high);
435 }
436 }
437 }
438
439 finish:
440 if (pceBuffer.empty()) {
441 // **** Is -1 the right value for ixLow, ixHigh? ****
442 if (ixLow != NULL) {
443 *ixLow = -1;
444 }
445
446 if (ixHigh != NULL) {
447 *ixHigh = -1
448 ;
449 }
450 return UCOL_PROCESSED_NULLORDER;
451 }
452
453 const PCEI *pcei = pceBuffer.get();
454
455 if (ixLow != NULL) {
456 *ixLow = pcei->low;
457 }
458
459 if (ixHigh != NULL) {
460 *ixHigh = pcei->high;
461 }
462
463 return pcei->ce;
464 }
465
466 U_NAMESPACE_END
467
468 U_CAPI int32_t U_EXPORT2
469 ucol_getMaxExpansion(const UCollationElements *elems,
470 int32_t order)
471 {
472 return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
473
474 // TODO: The old code masked the order according to strength and then did a binary search.
475 // However this was probably at least partially broken because of the following comment.
476 // Still, it might have found a match when this version may not.
477
478 // FIXME: with a masked search, there might be more than one hit,
479 // so we need to look forward and backward from the match to find all
480 // of the hits...
481 }
482
483 U_CAPI void U_EXPORT2
484 ucol_setText( UCollationElements *elems,
485 const UChar *text,
486 int32_t textLength,
487 UErrorCode *status)
488 {
489 if (U_FAILURE(*status)) {
490 return;
491 }
492
493 if ((text == NULL && textLength != 0)) {
494 *status = U_ILLEGAL_ARGUMENT_ERROR;
495 return;
496 }
497 UnicodeString s((UBool)(textLength < 0), text, textLength);
498 return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
499 }
500
501 U_CAPI int32_t U_EXPORT2
502 ucol_getOffset(const UCollationElements *elems)
503 {
504 return CollationElementIterator::fromUCollationElements(elems)->getOffset();
505 }
506
507 U_CAPI void U_EXPORT2
508 ucol_setOffset(UCollationElements *elems,
509 int32_t offset,
510 UErrorCode *status)
511 {
512 if (U_FAILURE(*status)) {
513 return;
514 }
515
516 CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
517 }
518
519 U_CAPI int32_t U_EXPORT2
520 ucol_primaryOrder (int32_t order)
521 {
522 return (order >> 16) & 0xffff;
523 }
524
525 U_CAPI int32_t U_EXPORT2
526 ucol_secondaryOrder (int32_t order)
527 {
528 return (order >> 8) & 0xff;
529 }
530
531 U_CAPI int32_t U_EXPORT2
532 ucol_tertiaryOrder (int32_t order)
533 {
534 return order & 0xff;
535 }
536
537 #endif /* #if !UCONFIG_NO_COLLATION */