1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * Modification history
15 * 1996-1999 various members of ICU team maintained C API for collation framework
16 * 02/16/2001 synwee Added internal method getPrevSpecialCE
17 * 03/01/2001 synwee Added maxexpansion functionality.
18 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
19 * 2012-2014 markus Rewritten in C++ again.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_COLLATION
26 #include "unicode/coll.h"
27 #include "unicode/tblcoll.h"
28 #include "unicode/bytestream.h"
29 #include "unicode/coleitr.h"
30 #include "unicode/ucoleitr.h"
31 #include "unicode/ustring.h"
33 #include "collation.h"
41 U_CAPI UCollator
* U_EXPORT2
42 ucol_openBinary(const uint8_t *bin
, int32_t length
,
43 const UCollator
*base
,
46 if(U_FAILURE(*status
)) { return NULL
; }
47 RuleBasedCollator
*coll
= new RuleBasedCollator(
49 RuleBasedCollator::rbcFromUCollator(base
),
52 *status
= U_MEMORY_ALLOCATION_ERROR
;
55 if(U_FAILURE(*status
)) {
59 return coll
->toUCollator();
62 U_CAPI
int32_t U_EXPORT2
63 ucol_cloneBinary(const UCollator
*coll
,
64 uint8_t *buffer
, int32_t capacity
,
67 if(U_FAILURE(*status
)) {
70 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
71 if(rbc
== NULL
&& coll
!= NULL
) {
72 *status
= U_UNSUPPORTED_ERROR
;
75 return rbc
->cloneBinary(buffer
, capacity
, *status
);
78 U_CAPI UCollator
* U_EXPORT2
79 ucol_safeClone(const UCollator
*coll
, void * /*stackBuffer*/, int32_t * pBufferSize
, UErrorCode
*status
)
81 if (status
== NULL
|| U_FAILURE(*status
)){
85 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
88 if (pBufferSize
!= NULL
) {
89 int32_t inputSize
= *pBufferSize
;
92 return NULL
; // preflighting for deprecated functionality
95 Collator
*newColl
= Collator::fromUCollator(coll
)->clone();
96 if (newColl
== NULL
) {
97 *status
= U_MEMORY_ALLOCATION_ERROR
;
99 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
101 return newColl
->toUCollator();
104 U_CAPI
void U_EXPORT2
105 ucol_close(UCollator
*coll
)
107 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE
);
108 UTRACE_DATA1(UTRACE_INFO
, "coll = %p", coll
);
110 delete Collator::fromUCollator(coll
);
115 U_CAPI
int32_t U_EXPORT2
116 ucol_mergeSortkeys(const uint8_t *src1
, int32_t src1Length
,
117 const uint8_t *src2
, int32_t src2Length
,
118 uint8_t *dest
, int32_t destCapacity
) {
119 /* check arguments */
120 if( src1
==NULL
|| src1Length
<-1 || src1Length
==0 || (src1Length
>0 && src1
[src1Length
-1]!=0) ||
121 src2
==NULL
|| src2Length
<-1 || src2Length
==0 || (src2Length
>0 && src2
[src2Length
-1]!=0) ||
122 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)
124 /* error, attempt to write a zero byte and return 0 */
125 if(dest
!=NULL
&& destCapacity
>0) {
131 /* check lengths and capacity */
133 src1Length
=(int32_t)uprv_strlen((const char *)src1
)+1;
136 src2Length
=(int32_t)uprv_strlen((const char *)src2
)+1;
139 int32_t destLength
=src1Length
+src2Length
;
140 if(destLength
>destCapacity
) {
141 /* the merged sort key does not fit into the destination */
145 /* merge the sort keys with the same number of levels */
148 /* copy level from src1 not including 00 or 01 */
150 while((b
=*src1
)>=2) {
155 /* add a 02 merge separator */
158 /* copy level from src2 not including 00 or 01 */
159 while((b
=*src2
)>=2) {
164 /* if both sort keys have another level, then add a 01 level separator and continue */
165 if(*src1
==1 && *src2
==1) {
175 * here, at least one sort key is finished now, but the other one
176 * might have some contents left from containing more levels;
177 * that contents is just appended to the result
180 /* src1 is not finished, therefore *src2==0, and src1 is appended */
183 /* append src2, "the other, unfinished sort key" */
184 while((*p
++=*src2
++)!=0) {}
186 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
187 return (int32_t)(p
-dest
);
190 U_CAPI
int32_t U_EXPORT2
191 ucol_getSortKey(const UCollator
*coll
,
193 int32_t sourceLength
,
195 int32_t resultLength
)
197 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY
);
198 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
199 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source string = %vh ", coll
, source
,
200 ((sourceLength
==-1 && source
!=NULL
) ? u_strlen(source
) : sourceLength
));
203 int32_t keySize
= Collator::fromUCollator(coll
)->
204 getSortKey(source
, sourceLength
, result
, resultLength
);
206 UTRACE_DATA2(UTRACE_VERBOSE
, "Sort Key = %vb", result
, keySize
);
207 UTRACE_EXIT_VALUE(keySize
);
211 U_CAPI
int32_t U_EXPORT2
212 ucol_nextSortKeyPart(const UCollator
*coll
,
215 uint8_t *dest
, int32_t count
,
219 if(status
==NULL
|| U_FAILURE(*status
)) {
222 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART
);
223 UTRACE_DATA6(UTRACE_VERBOSE
, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
224 coll
, iter
, state
[0], state
[1], dest
, count
);
226 int32_t i
= Collator::fromUCollator(coll
)->
227 internalNextSortKeyPart(iter
, state
, dest
, count
, *status
);
229 // Return number of meaningful sortkey bytes.
230 UTRACE_DATA4(UTRACE_VERBOSE
, "dest = %vb, state=%d %d",
231 dest
,i
, state
[0], state
[1]);
232 UTRACE_EXIT_VALUE_STATUS(i
, *status
);
237 * Produce a bound for a given sortkey and a number of levels.
239 U_CAPI
int32_t U_EXPORT2
240 ucol_getBound(const uint8_t *source
,
241 int32_t sourceLength
,
242 UColBoundMode boundType
,
245 int32_t resultLength
,
248 // consistency checks
249 if(status
== NULL
|| U_FAILURE(*status
)) {
253 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
257 int32_t sourceIndex
= 0;
258 // Scan the string until we skip enough of the key OR reach the end of the key
261 if(source
[sourceIndex
] == Collation::LEVEL_SEPARATOR_BYTE
) {
264 } while (noOfLevels
> 0
265 && (source
[sourceIndex
] != 0 || sourceIndex
< sourceLength
));
267 if((source
[sourceIndex
] == 0 || sourceIndex
== sourceLength
)
269 *status
= U_SORT_KEY_TOO_SHORT_WARNING
;
273 // READ ME: this code assumes that the values for boundType
274 // enum will not changes. They are set so that the enum value
275 // corresponds to the number of extra bytes each bound type
277 if(result
!= NULL
&& resultLength
>= sourceIndex
+boundType
) {
278 uprv_memcpy(result
, source
, sourceIndex
);
280 // Lower bound just gets terminated. No extra bytes
281 case UCOL_BOUND_LOWER
: // = 0
283 // Upper bound needs one extra byte
284 case UCOL_BOUND_UPPER
: // = 1
285 result
[sourceIndex
++] = 2;
287 // Upper long bound needs two extra bytes
288 case UCOL_BOUND_UPPER_LONG
: // = 2
289 result
[sourceIndex
++] = 0xFF;
290 result
[sourceIndex
++] = 0xFF;
293 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
296 result
[sourceIndex
++] = 0;
300 return sourceIndex
+boundType
+1;
304 U_CAPI
void U_EXPORT2
305 ucol_setMaxVariable(UCollator
*coll
, UColReorderCode group
, UErrorCode
*pErrorCode
) {
306 if(U_FAILURE(*pErrorCode
)) { return; }
307 Collator::fromUCollator(coll
)->setMaxVariable(group
, *pErrorCode
);
310 U_CAPI UColReorderCode U_EXPORT2
311 ucol_getMaxVariable(const UCollator
*coll
) {
312 return Collator::fromUCollator(coll
)->getMaxVariable();
315 U_CAPI
uint32_t U_EXPORT2
316 ucol_setVariableTop(UCollator
*coll
, const UChar
*varTop
, int32_t len
, UErrorCode
*status
) {
317 if(U_FAILURE(*status
) || coll
== NULL
) {
320 return Collator::fromUCollator(coll
)->setVariableTop(varTop
, len
, *status
);
323 U_CAPI
uint32_t U_EXPORT2
ucol_getVariableTop(const UCollator
*coll
, UErrorCode
*status
) {
324 if(U_FAILURE(*status
) || coll
== NULL
) {
327 return Collator::fromUCollator(coll
)->getVariableTop(*status
);
330 U_CAPI
void U_EXPORT2
331 ucol_restoreVariableTop(UCollator
*coll
, const uint32_t varTop
, UErrorCode
*status
) {
332 if(U_FAILURE(*status
) || coll
== NULL
) {
335 Collator::fromUCollator(coll
)->setVariableTop(varTop
, *status
);
338 U_CAPI
void U_EXPORT2
339 ucol_setAttribute(UCollator
*coll
, UColAttribute attr
, UColAttributeValue value
, UErrorCode
*status
) {
340 if(U_FAILURE(*status
) || coll
== NULL
) {
344 Collator::fromUCollator(coll
)->setAttribute(attr
, value
, *status
);
347 U_CAPI UColAttributeValue U_EXPORT2
348 ucol_getAttribute(const UCollator
*coll
, UColAttribute attr
, UErrorCode
*status
) {
349 if(U_FAILURE(*status
) || coll
== NULL
) {
353 return Collator::fromUCollator(coll
)->getAttribute(attr
, *status
);
356 U_CAPI
void U_EXPORT2
357 ucol_setStrength( UCollator
*coll
,
358 UCollationStrength strength
)
360 UErrorCode status
= U_ZERO_ERROR
;
361 ucol_setAttribute(coll
, UCOL_STRENGTH
, strength
, &status
);
364 U_CAPI UCollationStrength U_EXPORT2
365 ucol_getStrength(const UCollator
*coll
)
367 UErrorCode status
= U_ZERO_ERROR
;
368 return ucol_getAttribute(coll
, UCOL_STRENGTH
, &status
);
371 U_CAPI
int32_t U_EXPORT2
372 ucol_getReorderCodes(const UCollator
*coll
,
374 int32_t destCapacity
,
375 UErrorCode
*status
) {
376 if (U_FAILURE(*status
)) {
380 return Collator::fromUCollator(coll
)->getReorderCodes(dest
, destCapacity
, *status
);
383 U_CAPI
void U_EXPORT2
384 ucol_setReorderCodes(UCollator
* coll
,
385 const int32_t* reorderCodes
,
386 int32_t reorderCodesLength
,
387 UErrorCode
*status
) {
388 if (U_FAILURE(*status
)) {
392 Collator::fromUCollator(coll
)->setReorderCodes(reorderCodes
, reorderCodesLength
, *status
);
395 U_CAPI
int32_t U_EXPORT2
396 ucol_getEquivalentReorderCodes(int32_t reorderCode
,
398 int32_t destCapacity
,
399 UErrorCode
*pErrorCode
) {
400 return Collator::getEquivalentReorderCodes(reorderCode
, dest
, destCapacity
, *pErrorCode
);
403 U_CAPI
void U_EXPORT2
404 ucol_getVersion(const UCollator
* coll
,
405 UVersionInfo versionInfo
)
407 Collator::fromUCollator(coll
)->getVersion(versionInfo
);
410 U_CAPI UCollationResult U_EXPORT2
411 ucol_strcollIter( const UCollator
*coll
,
412 UCharIterator
*sIter
,
413 UCharIterator
*tIter
,
416 if(!status
|| U_FAILURE(*status
)) {
420 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER
);
421 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, sIter=%p, tIter=%p", coll
, sIter
, tIter
);
423 if(sIter
== NULL
|| tIter
== NULL
|| coll
== NULL
) {
424 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
425 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL
, *status
);
429 UCollationResult result
= Collator::fromUCollator(coll
)->compare(*sIter
, *tIter
, *status
);
431 UTRACE_EXIT_VALUE_STATUS(result
, *status
);
437 /* ucol_strcoll Main public API string comparison function */
439 U_CAPI UCollationResult U_EXPORT2
440 ucol_strcoll( const UCollator
*coll
,
442 int32_t sourceLength
,
444 int32_t targetLength
)
446 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL
);
447 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
448 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source=%p, target=%p", coll
, source
, target
);
449 UTRACE_DATA2(UTRACE_VERBOSE
, "source string = %vh ", source
, sourceLength
);
450 UTRACE_DATA2(UTRACE_VERBOSE
, "target string = %vh ", target
, targetLength
);
453 UErrorCode status
= U_ZERO_ERROR
;
454 UCollationResult returnVal
= Collator::fromUCollator(coll
)->
455 compare(source
, sourceLength
, target
, targetLength
, status
);
456 UTRACE_EXIT_VALUE_STATUS(returnVal
, status
);
460 U_CAPI UCollationResult U_EXPORT2
462 const UCollator
*coll
,
464 int32_t sourceLength
,
466 int32_t targetLength
,
469 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8
);
470 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
471 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source=%p, target=%p", coll
, source
, target
);
472 UTRACE_DATA2(UTRACE_VERBOSE
, "source string = %vb ", source
, sourceLength
);
473 UTRACE_DATA2(UTRACE_VERBOSE
, "target string = %vb ", target
, targetLength
);
476 if (U_FAILURE(*status
)) {
478 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL
, *status
);
482 UCollationResult returnVal
= Collator::fromUCollator(coll
)->internalCompareUTF8(
483 source
, sourceLength
, target
, targetLength
, *status
);
484 UTRACE_EXIT_VALUE_STATUS(returnVal
, *status
);
489 /* convenience function for comparing strings */
490 U_CAPI UBool U_EXPORT2
491 ucol_greater( const UCollator
*coll
,
493 int32_t sourceLength
,
495 int32_t targetLength
)
497 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
501 /* convenience function for comparing strings */
502 U_CAPI UBool U_EXPORT2
503 ucol_greaterOrEqual( const UCollator
*coll
,
505 int32_t sourceLength
,
507 int32_t targetLength
)
509 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
513 /* convenience function for comparing strings */
514 U_CAPI UBool U_EXPORT2
515 ucol_equal( const UCollator
*coll
,
517 int32_t sourceLength
,
519 int32_t targetLength
)
521 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
525 U_CAPI
void U_EXPORT2
526 ucol_getUCAVersion(const UCollator
* coll
, UVersionInfo info
) {
527 const Collator
*c
= Collator::fromUCollator(coll
);
531 // Note: This is tied to how the current implementation encodes the UCA version
532 // in the overall getVersion().
533 // Alternatively, we could load the root collator and get at lower-level data from there.
534 // Either way, it will reflect the input collator's UCA version only
535 // if it is a known implementation.
536 // It would be cleaner to make this a virtual Collator method.
544 U_CAPI
const UChar
* U_EXPORT2
545 ucol_getRules(const UCollator
*coll
, int32_t *length
) {
546 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
547 // OK to crash if coll==NULL: We do not want to check "this" pointers.
548 if(rbc
!= NULL
|| coll
== NULL
) {
549 const UnicodeString
&rules
= rbc
->getRules();
550 U_ASSERT(rules
.getBuffer()[rules
.length()] == 0);
551 *length
= rules
.length();
552 return rules
.getBuffer();
554 static const UChar _NUL
= 0;
559 U_CAPI
int32_t U_EXPORT2
560 ucol_getRulesEx(const UCollator
*coll
, UColRuleOption delta
, UChar
*buffer
, int32_t bufferLen
) {
562 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
563 if(rbc
!= NULL
|| coll
== NULL
) {
564 rbc
->getRules(delta
, rules
);
566 if(buffer
!= NULL
&& bufferLen
> 0) {
567 UErrorCode errorCode
= U_ZERO_ERROR
;
568 return rules
.extract(buffer
, bufferLen
, errorCode
);
570 return rules
.length();
574 U_CAPI
const char * U_EXPORT2
575 ucol_getLocale(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
576 return ucol_getLocaleByType(coll
, type
, status
);
579 U_CAPI
const char * U_EXPORT2
580 ucol_getLocaleByType(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
581 if(U_FAILURE(*status
)) {
584 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE
);
585 UTRACE_DATA1(UTRACE_INFO
, "coll=%p", coll
);
588 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
589 if(rbc
== NULL
&& coll
!= NULL
) {
590 *status
= U_UNSUPPORTED_ERROR
;
593 result
= rbc
->internalGetLocaleID(type
, *status
);
596 UTRACE_DATA1(UTRACE_INFO
, "result = %s", result
);
597 UTRACE_EXIT_STATUS(*status
);
601 U_CAPI USet
* U_EXPORT2
602 ucol_getTailoredSet(const UCollator
*coll
, UErrorCode
*status
) {
603 if(U_FAILURE(*status
)) {
606 UnicodeSet
*set
= Collator::fromUCollator(coll
)->getTailoredSet(*status
);
607 if(U_FAILURE(*status
)) {
611 return set
->toUSet();
614 U_CAPI UBool U_EXPORT2
615 ucol_equals(const UCollator
*source
, const UCollator
*target
) {
616 return source
== target
||
617 (*Collator::fromUCollator(source
)) == (*Collator::fromUCollator(target
));
620 #endif /* #if !UCONFIG_NO_COLLATION */