2 ******************************************************************************
3 * Copyright (C) 2001-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 02/15/2001 synwee Modified all methods to process its own function
13 * instead of calling the equivalent c++ api (coleitr.h)
14 * 2012-2014 markus Rewritten in C++ again.
15 ******************************************************************************/
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION
21 #include "unicode/coleitr.h"
22 #include "unicode/tblcoll.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/ustring.h"
25 #include "unicode/sortkey.h"
26 #include "unicode/uobject.h"
32 #define BUFFER_LENGTH 100
34 #define DEFAULT_BUFFER_SIZE 16
37 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
39 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
41 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
43 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
45 #define DELETE_ARRAY(array) uprv_free((void *) (array))
58 RCEI defaultBuffer
[DEFAULT_BUFFER_SIZE
];
67 void put(uint32_t ce
, int32_t ixLow
, int32_t ixHigh
);
71 RCEBuffer::RCEBuffer()
73 buffer
= defaultBuffer
;
75 bufferSize
= UPRV_LENGTHOF(defaultBuffer
);
78 RCEBuffer::~RCEBuffer()
80 if (buffer
!= defaultBuffer
) {
85 UBool
RCEBuffer::empty() const
87 return bufferIndex
<= 0;
90 void RCEBuffer::put(uint32_t ce
, int32_t ixLow
, int32_t ixHigh
)
92 if (bufferIndex
>= bufferSize
) {
93 RCEI
*newBuffer
= NEW_ARRAY(RCEI
, bufferSize
+ BUFFER_GROW
);
95 ARRAY_COPY(newBuffer
, buffer
, bufferSize
);
97 if (buffer
!= defaultBuffer
) {
102 bufferSize
+= BUFFER_GROW
;
105 buffer
[bufferIndex
].ce
= ce
;
106 buffer
[bufferIndex
].low
= ixLow
;
107 buffer
[bufferIndex
].high
= ixHigh
;
112 const RCEI
*RCEBuffer::get()
114 if (bufferIndex
> 0) {
115 return &buffer
[--bufferIndex
];
121 PCEBuffer::PCEBuffer()
123 buffer
= defaultBuffer
;
125 bufferSize
= UPRV_LENGTHOF(defaultBuffer
);
128 PCEBuffer::~PCEBuffer()
130 if (buffer
!= defaultBuffer
) {
131 DELETE_ARRAY(buffer
);
135 void PCEBuffer::reset()
140 UBool
PCEBuffer::empty() const
142 return bufferIndex
<= 0;
145 void PCEBuffer::put(uint64_t ce
, int32_t ixLow
, int32_t ixHigh
)
147 if (bufferIndex
>= bufferSize
) {
148 PCEI
*newBuffer
= NEW_ARRAY(PCEI
, bufferSize
+ BUFFER_GROW
);
150 ARRAY_COPY(newBuffer
, buffer
, bufferSize
);
152 if (buffer
!= defaultBuffer
) {
153 DELETE_ARRAY(buffer
);
157 bufferSize
+= BUFFER_GROW
;
160 buffer
[bufferIndex
].ce
= ce
;
161 buffer
[bufferIndex
].low
= ixLow
;
162 buffer
[bufferIndex
].high
= ixHigh
;
167 const PCEI
*PCEBuffer::get()
169 if (bufferIndex
> 0) {
170 return &buffer
[--bufferIndex
];
176 UCollationPCE::UCollationPCE(UCollationElements
*elems
) { init(elems
); }
178 UCollationPCE::UCollationPCE(CollationElementIterator
*iter
) { init(iter
); }
180 void UCollationPCE::init(UCollationElements
*elems
) {
181 init(CollationElementIterator::fromUCollationElements(elems
));
184 void UCollationPCE::init(CollationElementIterator
*iter
)
190 void UCollationPCE::init(const Collator
&coll
)
192 UErrorCode status
= U_ZERO_ERROR
;
194 strength
= coll
.getAttribute(UCOL_STRENGTH
, status
);
195 toShift
= coll
.getAttribute(UCOL_ALTERNATE_HANDLING
, status
) == UCOL_SHIFTED
;
197 variableTop
= coll
.getVariableTop(status
);
200 UCollationPCE::~UCollationPCE()
205 uint64_t UCollationPCE::processCE(uint32_t ce
)
207 uint64_t primary
= 0, secondary
= 0, tertiary
= 0, quaternary
= 0;
209 // This is clean, but somewhat slow...
210 // We could apply the mask to ce and then
211 // just get all three orders...
214 tertiary
= ucol_tertiaryOrder(ce
);
215 /* note fall-through */
218 secondary
= ucol_secondaryOrder(ce
);
219 /* note fall-through */
222 primary
= ucol_primaryOrder(ce
);
225 // **** This should probably handle continuations too. ****
226 // **** That means that we need 24 bits for the primary ****
227 // **** instead of the 16 that we're currently using. ****
228 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
229 // **** Another complication with continuations is that ****
230 // **** the *second* CE is marked as a continuation, so ****
231 // **** we always have to peek ahead to know how long ****
232 // **** the primary is... ****
233 if ((toShift
&& variableTop
> ce
&& primary
!= 0)
234 || (isShifted
&& primary
== 0)) {
237 return UCOL_IGNORABLE
;
240 if (strength
>= UCOL_QUATERNARY
) {
241 quaternary
= primary
;
244 primary
= secondary
= tertiary
= 0;
247 if (strength
>= UCOL_QUATERNARY
) {
254 return primary
<< 48 | secondary
<< 32 | tertiary
<< 16 | quaternary
;
259 /* public methods ---------------------------------------------------- */
261 U_CAPI UCollationElements
* U_EXPORT2
262 ucol_openElements(const UCollator
*coll
,
267 if (U_FAILURE(*status
)) {
270 if (coll
== NULL
|| (text
== NULL
&& textLength
!= 0)) {
271 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
274 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
276 *status
= U_UNSUPPORTED_ERROR
; // coll is a Collator but not a RuleBasedCollator
280 UnicodeString
s((UBool
)(textLength
< 0), text
, textLength
);
281 CollationElementIterator
*cei
= rbc
->createCollationElementIterator(s
);
283 *status
= U_MEMORY_ALLOCATION_ERROR
;
287 return cei
->toUCollationElements();
291 U_CAPI
void U_EXPORT2
292 ucol_closeElements(UCollationElements
*elems
)
294 delete CollationElementIterator::fromUCollationElements(elems
);
297 U_CAPI
void U_EXPORT2
298 ucol_reset(UCollationElements
*elems
)
300 CollationElementIterator::fromUCollationElements(elems
)->reset();
303 U_CAPI
int32_t U_EXPORT2
304 ucol_next(UCollationElements
*elems
,
307 if (U_FAILURE(*status
)) {
308 return UCOL_NULLORDER
;
311 return CollationElementIterator::fromUCollationElements(elems
)->next(*status
);
314 // temporarily restore the following removed internal function which is used by Spotlight
315 U_CAPI
int64_t U_EXPORT2
316 ucol_nextProcessed(UCollationElements
*elems
,
321 return (UCollationPCE::UCollationPCE(elems
)).nextProcessed(ixLow
, ixHigh
, status
);
328 UCollationPCE::nextProcessed(
333 int64_t result
= UCOL_IGNORABLE
;
334 uint32_t low
= 0, high
= 0;
336 if (U_FAILURE(*status
)) {
337 return UCOL_PROCESSED_NULLORDER
;
343 low
= cei
->getOffset();
344 int32_t ce
= cei
->next(*status
);
345 high
= cei
->getOffset();
347 if (ce
== UCOL_NULLORDER
) {
348 result
= UCOL_PROCESSED_NULLORDER
;
352 result
= processCE((uint32_t)ce
);
353 } while (result
== UCOL_IGNORABLE
);
359 if (ixHigh
!= NULL
) {
368 U_CAPI
int32_t U_EXPORT2
369 ucol_previous(UCollationElements
*elems
,
372 if(U_FAILURE(*status
)) {
373 return UCOL_NULLORDER
;
375 return CollationElementIterator::fromUCollationElements(elems
)->previous(*status
);
378 // temporarily restore the following removed internal function which is used by Spotlight
379 U_CAPI
int64_t U_EXPORT2
380 ucol_previousProcessed(UCollationElements
*elems
,
385 return (UCollationPCE::UCollationPCE(elems
)).previousProcessed(ixLow
, ixHigh
, status
);
391 UCollationPCE::previousProcessed(
396 int64_t result
= UCOL_IGNORABLE
;
397 int32_t low
= 0, high
= 0;
399 if (U_FAILURE(*status
)) {
400 return UCOL_PROCESSED_NULLORDER
;
403 // pceBuffer.reset();
405 while (pceBuffer
.empty()) {
406 // buffer raw CEs up to non-ignorable primary
410 // **** do we need to reset rceb, or will it always be empty at this point ****
412 high
= cei
->getOffset();
413 ce
= cei
->previous(*status
);
414 low
= cei
->getOffset();
416 if (ce
== UCOL_NULLORDER
) {
417 if (! rceb
.empty()) {
424 rceb
.put((uint32_t)ce
, low
, high
);
425 } while ((ce
& UCOL_PRIMARYORDERMASK
) == 0 || isContinuation(ce
));
427 // process the raw CEs
428 while (! rceb
.empty()) {
429 const RCEI
*rcei
= rceb
.get();
431 result
= processCE(rcei
->ce
);
433 if (result
!= UCOL_IGNORABLE
) {
434 pceBuffer
.put(result
, rcei
->low
, rcei
->high
);
440 if (pceBuffer
.empty()) {
441 // **** Is -1 the right value for ixLow, ixHigh? ****
446 if (ixHigh
!= NULL
) {
450 return UCOL_PROCESSED_NULLORDER
;
453 const PCEI
*pcei
= pceBuffer
.get();
459 if (ixHigh
!= NULL
) {
460 *ixHigh
= pcei
->high
;
468 U_CAPI
int32_t U_EXPORT2
469 ucol_getMaxExpansion(const UCollationElements
*elems
,
472 return CollationElementIterator::fromUCollationElements(elems
)->getMaxExpansion(order
);
474 // TODO: The old code masked the order according to strength and then did a binary search.
475 // However this was probably at least partially broken because of the following comment.
476 // Still, it might have found a match when this version may not.
478 // FIXME: with a masked search, there might be more than one hit,
479 // so we need to look forward and backward from the match to find all
483 U_CAPI
void U_EXPORT2
484 ucol_setText( UCollationElements
*elems
,
489 if (U_FAILURE(*status
)) {
493 if ((text
== NULL
&& textLength
!= 0)) {
494 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
497 UnicodeString
s((UBool
)(textLength
< 0), text
, textLength
);
498 return CollationElementIterator::fromUCollationElements(elems
)->setText(s
, *status
);
501 U_CAPI
int32_t U_EXPORT2
502 ucol_getOffset(const UCollationElements
*elems
)
504 return CollationElementIterator::fromUCollationElements(elems
)->getOffset();
507 U_CAPI
void U_EXPORT2
508 ucol_setOffset(UCollationElements
*elems
,
512 if (U_FAILURE(*status
)) {
516 CollationElementIterator::fromUCollationElements(elems
)->setOffset(offset
, *status
);
519 U_CAPI
int32_t U_EXPORT2
520 ucol_primaryOrder (int32_t order
)
522 return (order
>> 16) & 0xffff;
525 U_CAPI
int32_t U_EXPORT2
526 ucol_secondaryOrder (int32_t order
)
528 return (order
>> 8) & 0xff;
531 U_CAPI
int32_t U_EXPORT2
532 ucol_tertiaryOrder (int32_t order
)
537 #endif /* #if !UCONFIG_NO_COLLATION */