1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * Modification history
15 * 1996-1999 various members of ICU team maintained C API for collation framework
16 * 02/16/2001 synwee Added internal method getPrevSpecialCE
17 * 03/01/2001 synwee Added maxexpansion functionality.
18 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
19 * 2012-2014 markus Rewritten in C++ again.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_COLLATION
26 #include "unicode/coll.h"
27 #include "unicode/tblcoll.h"
28 #include "unicode/bytestream.h"
29 #include "unicode/coleitr.h"
30 #include "unicode/ucoleitr.h"
31 #include "unicode/ustring.h"
33 #include "collation.h"
41 U_CAPI UCollator
* U_EXPORT2
42 ucol_openBinary(const uint8_t *bin
, int32_t length
,
43 const UCollator
*base
,
46 if(U_FAILURE(*status
)) { return NULL
; }
47 RuleBasedCollator
*coll
= new RuleBasedCollator(
49 RuleBasedCollator::rbcFromUCollator(base
),
52 *status
= U_MEMORY_ALLOCATION_ERROR
;
55 if(U_FAILURE(*status
)) {
59 return coll
->toUCollator();
62 U_CAPI
int32_t U_EXPORT2
63 ucol_cloneBinary(const UCollator
*coll
,
64 uint8_t *buffer
, int32_t capacity
,
67 if(U_FAILURE(*status
)) {
70 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
71 if(rbc
== NULL
&& coll
!= NULL
) {
72 *status
= U_UNSUPPORTED_ERROR
;
75 return rbc
->cloneBinary(buffer
, capacity
, *status
);
78 U_CAPI UCollator
* U_EXPORT2
79 ucol_safeClone(const UCollator
*coll
, void * /*stackBuffer*/, int32_t * pBufferSize
, UErrorCode
*status
)
81 if (status
== NULL
|| U_FAILURE(*status
)){
85 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
88 if (pBufferSize
!= NULL
) {
89 int32_t inputSize
= *pBufferSize
;
92 return NULL
; // preflighting for deprecated functionality
95 Collator
*newColl
= Collator::fromUCollator(coll
)->clone();
96 if (newColl
== NULL
) {
97 *status
= U_MEMORY_ALLOCATION_ERROR
;
100 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
102 return newColl
->toUCollator();
105 U_CAPI
void U_EXPORT2
106 ucol_close(UCollator
*coll
)
108 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE
);
109 UTRACE_DATA1(UTRACE_INFO
, "coll = %p", coll
);
111 delete Collator::fromUCollator(coll
);
116 U_CAPI
int32_t U_EXPORT2
117 ucol_mergeSortkeys(const uint8_t *src1
, int32_t src1Length
,
118 const uint8_t *src2
, int32_t src2Length
,
119 uint8_t *dest
, int32_t destCapacity
) {
120 /* check arguments */
121 if( src1
==NULL
|| src1Length
<-1 || src1Length
==0 || (src1Length
>0 && src1
[src1Length
-1]!=0) ||
122 src2
==NULL
|| src2Length
<-1 || src2Length
==0 || (src2Length
>0 && src2
[src2Length
-1]!=0) ||
123 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)
125 /* error, attempt to write a zero byte and return 0 */
126 if(dest
!=NULL
&& destCapacity
>0) {
132 /* check lengths and capacity */
134 src1Length
=(int32_t)uprv_strlen((const char *)src1
)+1;
137 src2Length
=(int32_t)uprv_strlen((const char *)src2
)+1;
140 int32_t destLength
=src1Length
+src2Length
;
141 if(destLength
>destCapacity
) {
142 /* the merged sort key does not fit into the destination */
146 /* merge the sort keys with the same number of levels */
149 /* copy level from src1 not including 00 or 01 */
151 while((b
=*src1
)>=2) {
156 /* add a 02 merge separator */
159 /* copy level from src2 not including 00 or 01 */
160 while((b
=*src2
)>=2) {
165 /* if both sort keys have another level, then add a 01 level separator and continue */
166 if(*src1
==1 && *src2
==1) {
176 * here, at least one sort key is finished now, but the other one
177 * might have some contents left from containing more levels;
178 * that contents is just appended to the result
181 /* src1 is not finished, therefore *src2==0, and src1 is appended */
184 /* append src2, "the other, unfinished sort key" */
185 while((*p
++=*src2
++)!=0) {}
187 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
188 return (int32_t)(p
-dest
);
191 U_CAPI
int32_t U_EXPORT2
192 ucol_getSortKey(const UCollator
*coll
,
194 int32_t sourceLength
,
196 int32_t resultLength
)
198 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY
);
199 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
200 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source string = %vh ", coll
, source
,
201 ((sourceLength
==-1 && source
!=NULL
) ? u_strlen(source
) : sourceLength
));
204 int32_t keySize
= Collator::fromUCollator(coll
)->
205 getSortKey(source
, sourceLength
, result
, resultLength
);
207 UTRACE_DATA2(UTRACE_VERBOSE
, "Sort Key = %vb", result
, keySize
);
208 UTRACE_EXIT_VALUE(keySize
);
212 U_CAPI
int32_t U_EXPORT2
213 ucol_nextSortKeyPart(const UCollator
*coll
,
216 uint8_t *dest
, int32_t count
,
220 if(status
==NULL
|| U_FAILURE(*status
)) {
223 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART
);
224 UTRACE_DATA6(UTRACE_VERBOSE
, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
225 coll
, iter
, state
[0], state
[1], dest
, count
);
227 int32_t i
= Collator::fromUCollator(coll
)->
228 internalNextSortKeyPart(iter
, state
, dest
, count
, *status
);
230 // Return number of meaningful sortkey bytes.
231 UTRACE_DATA4(UTRACE_VERBOSE
, "dest = %vb, state=%d %d",
232 dest
,i
, state
[0], state
[1]);
233 UTRACE_EXIT_VALUE_STATUS(i
, *status
);
238 * Produce a bound for a given sortkey and a number of levels.
240 U_CAPI
int32_t U_EXPORT2
241 ucol_getBound(const uint8_t *source
,
242 int32_t sourceLength
,
243 UColBoundMode boundType
,
246 int32_t resultLength
,
249 // consistency checks
250 if(status
== NULL
|| U_FAILURE(*status
)) {
254 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
258 int32_t sourceIndex
= 0;
259 // Scan the string until we skip enough of the key OR reach the end of the key
262 if(source
[sourceIndex
] == Collation::LEVEL_SEPARATOR_BYTE
) {
265 } while (noOfLevels
> 0
266 && (source
[sourceIndex
] != 0 || sourceIndex
< sourceLength
));
268 if((source
[sourceIndex
] == 0 || sourceIndex
== sourceLength
)
270 *status
= U_SORT_KEY_TOO_SHORT_WARNING
;
274 // READ ME: this code assumes that the values for boundType
275 // enum will not changes. They are set so that the enum value
276 // corresponds to the number of extra bytes each bound type
278 if(result
!= NULL
&& resultLength
>= sourceIndex
+boundType
) {
279 uprv_memcpy(result
, source
, sourceIndex
);
281 // Lower bound just gets terminated. No extra bytes
282 case UCOL_BOUND_LOWER
: // = 0
284 // Upper bound needs one extra byte
285 case UCOL_BOUND_UPPER
: // = 1
286 result
[sourceIndex
++] = 2;
288 // Upper long bound needs two extra bytes
289 case UCOL_BOUND_UPPER_LONG
: // = 2
290 result
[sourceIndex
++] = 0xFF;
291 result
[sourceIndex
++] = 0xFF;
294 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
297 result
[sourceIndex
++] = 0;
301 return sourceIndex
+boundType
+1;
305 U_CAPI
void U_EXPORT2
306 ucol_setMaxVariable(UCollator
*coll
, UColReorderCode group
, UErrorCode
*pErrorCode
) {
307 if(U_FAILURE(*pErrorCode
)) { return; }
308 Collator::fromUCollator(coll
)->setMaxVariable(group
, *pErrorCode
);
311 U_CAPI UColReorderCode U_EXPORT2
312 ucol_getMaxVariable(const UCollator
*coll
) {
313 return Collator::fromUCollator(coll
)->getMaxVariable();
316 U_CAPI
uint32_t U_EXPORT2
317 ucol_setVariableTop(UCollator
*coll
, const UChar
*varTop
, int32_t len
, UErrorCode
*status
) {
318 if(U_FAILURE(*status
) || coll
== NULL
) {
321 return Collator::fromUCollator(coll
)->setVariableTop(varTop
, len
, *status
);
324 U_CAPI
uint32_t U_EXPORT2
ucol_getVariableTop(const UCollator
*coll
, UErrorCode
*status
) {
325 if(U_FAILURE(*status
) || coll
== NULL
) {
328 return Collator::fromUCollator(coll
)->getVariableTop(*status
);
331 U_CAPI
void U_EXPORT2
332 ucol_restoreVariableTop(UCollator
*coll
, const uint32_t varTop
, UErrorCode
*status
) {
333 if(U_FAILURE(*status
) || coll
== NULL
) {
336 Collator::fromUCollator(coll
)->setVariableTop(varTop
, *status
);
339 U_CAPI
void U_EXPORT2
340 ucol_setAttribute(UCollator
*coll
, UColAttribute attr
, UColAttributeValue value
, UErrorCode
*status
) {
341 if(U_FAILURE(*status
) || coll
== NULL
) {
345 Collator::fromUCollator(coll
)->setAttribute(attr
, value
, *status
);
348 U_CAPI UColAttributeValue U_EXPORT2
349 ucol_getAttribute(const UCollator
*coll
, UColAttribute attr
, UErrorCode
*status
) {
350 if(U_FAILURE(*status
) || coll
== NULL
) {
354 return Collator::fromUCollator(coll
)->getAttribute(attr
, *status
);
357 U_CAPI
void U_EXPORT2
358 ucol_setStrength( UCollator
*coll
,
359 UCollationStrength strength
)
361 UErrorCode status
= U_ZERO_ERROR
;
362 ucol_setAttribute(coll
, UCOL_STRENGTH
, strength
, &status
);
365 U_CAPI UCollationStrength U_EXPORT2
366 ucol_getStrength(const UCollator
*coll
)
368 UErrorCode status
= U_ZERO_ERROR
;
369 return ucol_getAttribute(coll
, UCOL_STRENGTH
, &status
);
372 U_CAPI
int32_t U_EXPORT2
373 ucol_getReorderCodes(const UCollator
*coll
,
375 int32_t destCapacity
,
376 UErrorCode
*status
) {
377 if (U_FAILURE(*status
)) {
381 return Collator::fromUCollator(coll
)->getReorderCodes(dest
, destCapacity
, *status
);
384 U_CAPI
void U_EXPORT2
385 ucol_setReorderCodes(UCollator
* coll
,
386 const int32_t* reorderCodes
,
387 int32_t reorderCodesLength
,
388 UErrorCode
*status
) {
389 if (U_FAILURE(*status
)) {
393 Collator::fromUCollator(coll
)->setReorderCodes(reorderCodes
, reorderCodesLength
, *status
);
396 U_CAPI
int32_t U_EXPORT2
397 ucol_getEquivalentReorderCodes(int32_t reorderCode
,
399 int32_t destCapacity
,
400 UErrorCode
*pErrorCode
) {
401 return Collator::getEquivalentReorderCodes(reorderCode
, dest
, destCapacity
, *pErrorCode
);
404 U_CAPI
void U_EXPORT2
405 ucol_getVersion(const UCollator
* coll
,
406 UVersionInfo versionInfo
)
408 Collator::fromUCollator(coll
)->getVersion(versionInfo
);
411 U_CAPI UCollationResult U_EXPORT2
412 ucol_strcollIter( const UCollator
*coll
,
413 UCharIterator
*sIter
,
414 UCharIterator
*tIter
,
417 if(!status
|| U_FAILURE(*status
)) {
421 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER
);
422 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, sIter=%p, tIter=%p", coll
, sIter
, tIter
);
424 if(sIter
== NULL
|| tIter
== NULL
|| coll
== NULL
) {
425 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
426 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL
, *status
);
430 UCollationResult result
= Collator::fromUCollator(coll
)->compare(*sIter
, *tIter
, *status
);
432 UTRACE_EXIT_VALUE_STATUS(result
, *status
);
438 /* ucol_strcoll Main public API string comparison function */
440 U_CAPI UCollationResult U_EXPORT2
441 ucol_strcoll( const UCollator
*coll
,
443 int32_t sourceLength
,
445 int32_t targetLength
)
447 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL
);
448 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
449 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source=%p, target=%p", coll
, source
, target
);
450 UTRACE_DATA2(UTRACE_VERBOSE
, "source string = %vh ", source
, sourceLength
);
451 UTRACE_DATA2(UTRACE_VERBOSE
, "target string = %vh ", target
, targetLength
);
454 UErrorCode status
= U_ZERO_ERROR
;
455 UCollationResult returnVal
= Collator::fromUCollator(coll
)->
456 compare(source
, sourceLength
, target
, targetLength
, status
);
457 UTRACE_EXIT_VALUE_STATUS(returnVal
, status
);
461 U_CAPI UCollationResult U_EXPORT2
463 const UCollator
*coll
,
465 int32_t sourceLength
,
467 int32_t targetLength
,
470 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8
);
471 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
472 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source=%p, target=%p", coll
, source
, target
);
473 UTRACE_DATA2(UTRACE_VERBOSE
, "source string = %vb ", source
, sourceLength
);
474 UTRACE_DATA2(UTRACE_VERBOSE
, "target string = %vb ", target
, targetLength
);
477 if (U_FAILURE(*status
)) {
479 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL
, *status
);
483 UCollationResult returnVal
= Collator::fromUCollator(coll
)->internalCompareUTF8(
484 source
, sourceLength
, target
, targetLength
, *status
);
485 UTRACE_EXIT_VALUE_STATUS(returnVal
, *status
);
490 /* convenience function for comparing strings */
491 U_CAPI UBool U_EXPORT2
492 ucol_greater( const UCollator
*coll
,
494 int32_t sourceLength
,
496 int32_t targetLength
)
498 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
502 /* convenience function for comparing strings */
503 U_CAPI UBool U_EXPORT2
504 ucol_greaterOrEqual( const UCollator
*coll
,
506 int32_t sourceLength
,
508 int32_t targetLength
)
510 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
514 /* convenience function for comparing strings */
515 U_CAPI UBool U_EXPORT2
516 ucol_equal( const UCollator
*coll
,
518 int32_t sourceLength
,
520 int32_t targetLength
)
522 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
526 U_CAPI
void U_EXPORT2
527 ucol_getUCAVersion(const UCollator
* coll
, UVersionInfo info
) {
528 const Collator
*c
= Collator::fromUCollator(coll
);
532 // Note: This is tied to how the current implementation encodes the UCA version
533 // in the overall getVersion().
534 // Alternatively, we could load the root collator and get at lower-level data from there.
535 // Either way, it will reflect the input collator's UCA version only
536 // if it is a known implementation.
537 // It would be cleaner to make this a virtual Collator method.
545 U_CAPI
const UChar
* U_EXPORT2
546 ucol_getRules(const UCollator
*coll
, int32_t *length
) {
547 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
548 // OK to crash if coll==NULL: We do not want to check "this" pointers.
549 if(rbc
!= NULL
|| coll
== NULL
) {
550 const UnicodeString
&rules
= rbc
->getRules();
551 U_ASSERT(rules
.getBuffer()[rules
.length()] == 0);
552 *length
= rules
.length();
553 return rules
.getBuffer();
555 static const UChar _NUL
= 0;
560 U_CAPI
int32_t U_EXPORT2
561 ucol_getRulesEx(const UCollator
*coll
, UColRuleOption delta
, UChar
*buffer
, int32_t bufferLen
) {
563 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
564 if(rbc
!= NULL
|| coll
== NULL
) {
565 rbc
->getRules(delta
, rules
);
567 if(buffer
!= NULL
&& bufferLen
> 0) {
568 UErrorCode errorCode
= U_ZERO_ERROR
;
569 return rules
.extract(buffer
, bufferLen
, errorCode
);
571 return rules
.length();
575 U_CAPI
const char * U_EXPORT2
576 ucol_getLocale(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
577 return ucol_getLocaleByType(coll
, type
, status
);
580 U_CAPI
const char * U_EXPORT2
581 ucol_getLocaleByType(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
582 if(U_FAILURE(*status
)) {
585 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE
);
586 UTRACE_DATA1(UTRACE_INFO
, "coll=%p", coll
);
589 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
590 if(rbc
== NULL
&& coll
!= NULL
) {
591 *status
= U_UNSUPPORTED_ERROR
;
594 result
= rbc
->internalGetLocaleID(type
, *status
);
597 UTRACE_DATA1(UTRACE_INFO
, "result = %s", result
);
598 UTRACE_EXIT_STATUS(*status
);
602 U_CAPI USet
* U_EXPORT2
603 ucol_getTailoredSet(const UCollator
*coll
, UErrorCode
*status
) {
604 if(U_FAILURE(*status
)) {
607 UnicodeSet
*set
= Collator::fromUCollator(coll
)->getTailoredSet(*status
);
608 if(U_FAILURE(*status
)) {
612 return set
->toUSet();
615 U_CAPI UBool U_EXPORT2
616 ucol_equals(const UCollator
*source
, const UCollator
*target
) {
617 return source
== target
||
618 (*Collator::fromUCollator(source
)) == (*Collator::fromUCollator(target
));
621 #endif /* #if !UCONFIG_NO_COLLATION */