1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
5 * Copyright (C) 2001-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 02/15/2001 synwee Modified all methods to process its own function
15 * instead of calling the equivalent c++ api (coleitr.h)
16 * 2012-2014 markus Rewritten in C++ again.
17 ******************************************************************************/
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_COLLATION
23 #include "unicode/coleitr.h"
24 #include "unicode/tblcoll.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/ustring.h"
27 #include "unicode/sortkey.h"
28 #include "unicode/uobject.h"
34 #define BUFFER_LENGTH 100
36 #define DEFAULT_BUFFER_SIZE 16
39 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
41 #define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
43 #define DELETE_ARRAY(array) uprv_free((void *) (array))
56 RCEI defaultBuffer
[DEFAULT_BUFFER_SIZE
];
64 UBool
isEmpty() const;
65 void put(uint32_t ce
, int32_t ixLow
, int32_t ixHigh
, UErrorCode
&errorCode
);
69 RCEBuffer::RCEBuffer()
71 buffer
= defaultBuffer
;
73 bufferSize
= UPRV_LENGTHOF(defaultBuffer
);
76 RCEBuffer::~RCEBuffer()
78 if (buffer
!= defaultBuffer
) {
83 UBool
RCEBuffer::isEmpty() const
85 return bufferIndex
<= 0;
88 void RCEBuffer::put(uint32_t ce
, int32_t ixLow
, int32_t ixHigh
, UErrorCode
&errorCode
)
90 if (U_FAILURE(errorCode
)) {
93 if (bufferIndex
>= bufferSize
) {
94 RCEI
*newBuffer
= NEW_ARRAY(RCEI
, bufferSize
+ BUFFER_GROW
);
95 if (newBuffer
== NULL
) {
96 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
100 ARRAY_COPY(newBuffer
, buffer
, bufferSize
);
102 if (buffer
!= defaultBuffer
) {
103 DELETE_ARRAY(buffer
);
107 bufferSize
+= BUFFER_GROW
;
110 buffer
[bufferIndex
].ce
= ce
;
111 buffer
[bufferIndex
].low
= ixLow
;
112 buffer
[bufferIndex
].high
= ixHigh
;
117 const RCEI
*RCEBuffer::get()
119 if (bufferIndex
> 0) {
120 return &buffer
[--bufferIndex
];
126 PCEBuffer::PCEBuffer()
128 buffer
= defaultBuffer
;
130 bufferSize
= UPRV_LENGTHOF(defaultBuffer
);
133 PCEBuffer::~PCEBuffer()
135 if (buffer
!= defaultBuffer
) {
136 DELETE_ARRAY(buffer
);
140 void PCEBuffer::reset()
145 UBool
PCEBuffer::isEmpty() const
147 return bufferIndex
<= 0;
150 void PCEBuffer::put(uint64_t ce
, int32_t ixLow
, int32_t ixHigh
, UErrorCode
&errorCode
)
152 if (U_FAILURE(errorCode
)) {
155 if (bufferIndex
>= bufferSize
) {
156 PCEI
*newBuffer
= NEW_ARRAY(PCEI
, bufferSize
+ BUFFER_GROW
);
157 if (newBuffer
== NULL
) {
158 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
162 ARRAY_COPY(newBuffer
, buffer
, bufferSize
);
164 if (buffer
!= defaultBuffer
) {
165 DELETE_ARRAY(buffer
);
169 bufferSize
+= BUFFER_GROW
;
172 buffer
[bufferIndex
].ce
= ce
;
173 buffer
[bufferIndex
].low
= ixLow
;
174 buffer
[bufferIndex
].high
= ixHigh
;
179 const PCEI
*PCEBuffer::get()
181 if (bufferIndex
> 0) {
182 return &buffer
[--bufferIndex
];
188 UCollationPCE::UCollationPCE(UCollationElements
*elems
) { init(elems
); }
190 UCollationPCE::UCollationPCE(CollationElementIterator
*iter
) { init(iter
); }
192 void UCollationPCE::init(UCollationElements
*elems
) {
193 init(CollationElementIterator::fromUCollationElements(elems
));
196 void UCollationPCE::init(CollationElementIterator
*iter
)
202 void UCollationPCE::init(const Collator
&coll
)
204 UErrorCode status
= U_ZERO_ERROR
;
206 strength
= coll
.getAttribute(UCOL_STRENGTH
, status
);
207 toShift
= coll
.getAttribute(UCOL_ALTERNATE_HANDLING
, status
) == UCOL_SHIFTED
;
209 variableTop
= coll
.getVariableTop(status
);
212 UCollationPCE::~UCollationPCE()
217 uint64_t UCollationPCE::processCE(uint32_t ce
)
219 uint64_t primary
= 0, secondary
= 0, tertiary
= 0, quaternary
= 0;
221 // This is clean, but somewhat slow...
222 // We could apply the mask to ce and then
223 // just get all three orders...
226 tertiary
= ucol_tertiaryOrder(ce
);
230 secondary
= ucol_secondaryOrder(ce
);
234 primary
= ucol_primaryOrder(ce
);
237 // **** This should probably handle continuations too. ****
238 // **** That means that we need 24 bits for the primary ****
239 // **** instead of the 16 that we're currently using. ****
240 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
241 // **** Another complication with continuations is that ****
242 // **** the *second* CE is marked as a continuation, so ****
243 // **** we always have to peek ahead to know how long ****
244 // **** the primary is... ****
245 if ((toShift
&& variableTop
> ce
&& primary
!= 0)
246 || (isShifted
&& primary
== 0)) {
249 return UCOL_IGNORABLE
;
252 if (strength
>= UCOL_QUATERNARY
) {
253 quaternary
= primary
;
256 primary
= secondary
= tertiary
= 0;
259 if (strength
>= UCOL_QUATERNARY
) {
266 return primary
<< 48 | secondary
<< 32 | tertiary
<< 16 | quaternary
;
271 /* public methods ---------------------------------------------------- */
273 U_CAPI UCollationElements
* U_EXPORT2
274 ucol_openElements(const UCollator
*coll
,
279 if (U_FAILURE(*status
)) {
282 if (coll
== NULL
|| (text
== NULL
&& textLength
!= 0)) {
283 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
286 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
288 *status
= U_UNSUPPORTED_ERROR
; // coll is a Collator but not a RuleBasedCollator
292 UnicodeString
s((UBool
)(textLength
< 0), text
, textLength
);
293 CollationElementIterator
*cei
= rbc
->createCollationElementIterator(s
);
295 *status
= U_MEMORY_ALLOCATION_ERROR
;
299 return cei
->toUCollationElements();
303 U_CAPI
void U_EXPORT2
304 ucol_closeElements(UCollationElements
*elems
)
306 delete CollationElementIterator::fromUCollationElements(elems
);
309 U_CAPI
void U_EXPORT2
310 ucol_reset(UCollationElements
*elems
)
312 CollationElementIterator::fromUCollationElements(elems
)->reset();
315 U_CAPI
int32_t U_EXPORT2
316 ucol_next(UCollationElements
*elems
,
319 if (U_FAILURE(*status
)) {
320 return UCOL_NULLORDER
;
323 return CollationElementIterator::fromUCollationElements(elems
)->next(*status
);
326 // temporarily restore the following removed internal function which is used by Spotlight
327 U_CAPI
int64_t U_EXPORT2
328 ucol_nextProcessed(UCollationElements
*elems
,
333 return (UCollationPCE(elems
)).nextProcessed(ixLow
, ixHigh
, status
);
340 UCollationPCE::nextProcessed(
345 int64_t result
= UCOL_IGNORABLE
;
346 uint32_t low
= 0, high
= 0;
348 if (U_FAILURE(*status
)) {
349 return UCOL_PROCESSED_NULLORDER
;
355 low
= cei
->getOffset();
356 int32_t ce
= cei
->next(*status
);
357 high
= cei
->getOffset();
359 if (ce
== UCOL_NULLORDER
) {
360 result
= UCOL_PROCESSED_NULLORDER
;
364 result
= processCE((uint32_t)ce
);
365 } while (result
== UCOL_IGNORABLE
);
371 if (ixHigh
!= NULL
) {
380 U_CAPI
int32_t U_EXPORT2
381 ucol_previous(UCollationElements
*elems
,
384 if(U_FAILURE(*status
)) {
385 return UCOL_NULLORDER
;
387 return CollationElementIterator::fromUCollationElements(elems
)->previous(*status
);
390 // temporarily restore the following removed internal function which is used by Spotlight
391 U_CAPI
int64_t U_EXPORT2
392 ucol_previousProcessed(UCollationElements
*elems
,
397 return (UCollationPCE(elems
)).previousProcessed(ixLow
, ixHigh
, status
);
403 UCollationPCE::previousProcessed(
408 int64_t result
= UCOL_IGNORABLE
;
409 int32_t low
= 0, high
= 0;
411 if (U_FAILURE(*status
)) {
412 return UCOL_PROCESSED_NULLORDER
;
415 // pceBuffer.reset();
417 while (pceBuffer
.isEmpty()) {
418 // buffer raw CEs up to non-ignorable primary
422 // **** do we need to reset rceb, or will it always be empty at this point ****
424 high
= cei
->getOffset();
425 ce
= cei
->previous(*status
);
426 low
= cei
->getOffset();
428 if (ce
== UCOL_NULLORDER
) {
429 if (!rceb
.isEmpty()) {
436 rceb
.put((uint32_t)ce
, low
, high
, *status
);
437 } while (U_SUCCESS(*status
) && ((ce
& UCOL_PRIMARYORDERMASK
) == 0 || isContinuation(ce
)));
439 // process the raw CEs
440 while (U_SUCCESS(*status
) && !rceb
.isEmpty()) {
441 const RCEI
*rcei
= rceb
.get();
443 result
= processCE(rcei
->ce
);
445 if (result
!= UCOL_IGNORABLE
) {
446 pceBuffer
.put(result
, rcei
->low
, rcei
->high
, *status
);
449 if (U_FAILURE(*status
)) {
450 return UCOL_PROCESSED_NULLORDER
;
455 if (pceBuffer
.isEmpty()) {
456 // **** Is -1 the right value for ixLow, ixHigh? ****
461 if (ixHigh
!= NULL
) {
465 return UCOL_PROCESSED_NULLORDER
;
468 const PCEI
*pcei
= pceBuffer
.get();
474 if (ixHigh
!= NULL
) {
475 *ixHigh
= pcei
->high
;
483 U_CAPI
int32_t U_EXPORT2
484 ucol_getMaxExpansion(const UCollationElements
*elems
,
487 return CollationElementIterator::fromUCollationElements(elems
)->getMaxExpansion(order
);
489 // TODO: The old code masked the order according to strength and then did a binary search.
490 // However this was probably at least partially broken because of the following comment.
491 // Still, it might have found a match when this version may not.
493 // FIXME: with a masked search, there might be more than one hit,
494 // so we need to look forward and backward from the match to find all
498 U_CAPI
void U_EXPORT2
499 ucol_setText( UCollationElements
*elems
,
504 if (U_FAILURE(*status
)) {
508 if ((text
== NULL
&& textLength
!= 0)) {
509 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
512 UnicodeString
s((UBool
)(textLength
< 0), text
, textLength
);
513 return CollationElementIterator::fromUCollationElements(elems
)->setText(s
, *status
);
516 U_CAPI
int32_t U_EXPORT2
517 ucol_getOffset(const UCollationElements
*elems
)
519 return CollationElementIterator::fromUCollationElements(elems
)->getOffset();
522 U_CAPI
void U_EXPORT2
523 ucol_setOffset(UCollationElements
*elems
,
527 if (U_FAILURE(*status
)) {
531 CollationElementIterator::fromUCollationElements(elems
)->setOffset(offset
, *status
);
534 U_CAPI
int32_t U_EXPORT2
535 ucol_primaryOrder (int32_t order
)
537 return (order
>> 16) & 0xffff;
540 U_CAPI
int32_t U_EXPORT2
541 ucol_secondaryOrder (int32_t order
)
543 return (order
>> 8) & 0xff;
546 U_CAPI
int32_t U_EXPORT2
547 ucol_tertiaryOrder (int32_t order
)
552 #endif /* #if !UCONFIG_NO_COLLATION */