2 *******************************************************************************
3 * Copyright (C) 1996-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
8 * tab size: 8 (not used)
11 * Modification history
13 * 1996-1999 various members of ICU team maintained C API for collation framework
14 * 02/16/2001 synwee Added internal method getPrevSpecialCE
15 * 03/01/2001 synwee Added maxexpansion functionality.
16 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
17 * 2012-2014 markus Rewritten in C++ again.
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_COLLATION
24 #include "unicode/coll.h"
25 #include "unicode/tblcoll.h"
26 #include "unicode/bytestream.h"
27 #include "unicode/coleitr.h"
28 #include "unicode/ucoleitr.h"
29 #include "unicode/ustring.h"
31 #include "collation.h"
39 U_CAPI UCollator
* U_EXPORT2
40 ucol_openBinary(const uint8_t *bin
, int32_t length
,
41 const UCollator
*base
,
44 if(U_FAILURE(*status
)) { return NULL
; }
45 RuleBasedCollator
*coll
= new RuleBasedCollator(
47 RuleBasedCollator::rbcFromUCollator(base
),
50 *status
= U_MEMORY_ALLOCATION_ERROR
;
53 if(U_FAILURE(*status
)) {
57 return coll
->toUCollator();
60 U_CAPI
int32_t U_EXPORT2
61 ucol_cloneBinary(const UCollator
*coll
,
62 uint8_t *buffer
, int32_t capacity
,
65 if(U_FAILURE(*status
)) {
68 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
69 if(rbc
== NULL
&& coll
!= NULL
) {
70 *status
= U_UNSUPPORTED_ERROR
;
73 return rbc
->cloneBinary(buffer
, capacity
, *status
);
76 U_CAPI UCollator
* U_EXPORT2
77 ucol_safeClone(const UCollator
*coll
, void * /*stackBuffer*/, int32_t * pBufferSize
, UErrorCode
*status
)
79 if (status
== NULL
|| U_FAILURE(*status
)){
83 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
86 if (pBufferSize
!= NULL
) {
87 int32_t inputSize
= *pBufferSize
;
90 return NULL
; // preflighting for deprecated functionality
93 Collator
*newColl
= Collator::fromUCollator(coll
)->clone();
94 if (newColl
== NULL
) {
95 *status
= U_MEMORY_ALLOCATION_ERROR
;
97 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
99 return newColl
->toUCollator();
102 U_CAPI
void U_EXPORT2
103 ucol_close(UCollator
*coll
)
105 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE
);
106 UTRACE_DATA1(UTRACE_INFO
, "coll = %p", coll
);
108 delete Collator::fromUCollator(coll
);
113 U_CAPI
int32_t U_EXPORT2
114 ucol_mergeSortkeys(const uint8_t *src1
, int32_t src1Length
,
115 const uint8_t *src2
, int32_t src2Length
,
116 uint8_t *dest
, int32_t destCapacity
) {
117 /* check arguments */
118 if( src1
==NULL
|| src1Length
<-1 || src1Length
==0 || (src1Length
>0 && src1
[src1Length
-1]!=0) ||
119 src2
==NULL
|| src2Length
<-1 || src2Length
==0 || (src2Length
>0 && src2
[src2Length
-1]!=0) ||
120 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)
122 /* error, attempt to write a zero byte and return 0 */
123 if(dest
!=NULL
&& destCapacity
>0) {
129 /* check lengths and capacity */
131 src1Length
=(int32_t)uprv_strlen((const char *)src1
)+1;
134 src2Length
=(int32_t)uprv_strlen((const char *)src2
)+1;
137 int32_t destLength
=src1Length
+src2Length
;
138 if(destLength
>destCapacity
) {
139 /* the merged sort key does not fit into the destination */
143 /* merge the sort keys with the same number of levels */
146 /* copy level from src1 not including 00 or 01 */
148 while((b
=*src1
)>=2) {
153 /* add a 02 merge separator */
156 /* copy level from src2 not including 00 or 01 */
157 while((b
=*src2
)>=2) {
162 /* if both sort keys have another level, then add a 01 level separator and continue */
163 if(*src1
==1 && *src2
==1) {
173 * here, at least one sort key is finished now, but the other one
174 * might have some contents left from containing more levels;
175 * that contents is just appended to the result
178 /* src1 is not finished, therefore *src2==0, and src1 is appended */
181 /* append src2, "the other, unfinished sort key" */
182 while((*p
++=*src2
++)!=0) {}
184 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
185 return (int32_t)(p
-dest
);
188 U_CAPI
int32_t U_EXPORT2
189 ucol_getSortKey(const UCollator
*coll
,
191 int32_t sourceLength
,
193 int32_t resultLength
)
195 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY
);
196 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
197 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source string = %vh ", coll
, source
,
198 ((sourceLength
==-1 && source
!=NULL
) ? u_strlen(source
) : sourceLength
));
201 int32_t keySize
= Collator::fromUCollator(coll
)->
202 getSortKey(source
, sourceLength
, result
, resultLength
);
204 UTRACE_DATA2(UTRACE_VERBOSE
, "Sort Key = %vb", result
, keySize
);
205 UTRACE_EXIT_VALUE(keySize
);
209 U_CAPI
int32_t U_EXPORT2
210 ucol_nextSortKeyPart(const UCollator
*coll
,
213 uint8_t *dest
, int32_t count
,
217 if(status
==NULL
|| U_FAILURE(*status
)) {
220 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART
);
221 UTRACE_DATA6(UTRACE_VERBOSE
, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
222 coll
, iter
, state
[0], state
[1], dest
, count
);
224 int32_t i
= Collator::fromUCollator(coll
)->
225 internalNextSortKeyPart(iter
, state
, dest
, count
, *status
);
227 // Return number of meaningful sortkey bytes.
228 UTRACE_DATA4(UTRACE_VERBOSE
, "dest = %vb, state=%d %d",
229 dest
,i
, state
[0], state
[1]);
230 UTRACE_EXIT_VALUE_STATUS(i
, *status
);
235 * Produce a bound for a given sortkey and a number of levels.
237 U_CAPI
int32_t U_EXPORT2
238 ucol_getBound(const uint8_t *source
,
239 int32_t sourceLength
,
240 UColBoundMode boundType
,
243 int32_t resultLength
,
246 // consistency checks
247 if(status
== NULL
|| U_FAILURE(*status
)) {
251 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
255 int32_t sourceIndex
= 0;
256 // Scan the string until we skip enough of the key OR reach the end of the key
259 if(source
[sourceIndex
] == Collation::LEVEL_SEPARATOR_BYTE
) {
262 } while (noOfLevels
> 0
263 && (source
[sourceIndex
] != 0 || sourceIndex
< sourceLength
));
265 if((source
[sourceIndex
] == 0 || sourceIndex
== sourceLength
)
267 *status
= U_SORT_KEY_TOO_SHORT_WARNING
;
271 // READ ME: this code assumes that the values for boundType
272 // enum will not changes. They are set so that the enum value
273 // corresponds to the number of extra bytes each bound type
275 if(result
!= NULL
&& resultLength
>= sourceIndex
+boundType
) {
276 uprv_memcpy(result
, source
, sourceIndex
);
278 // Lower bound just gets terminated. No extra bytes
279 case UCOL_BOUND_LOWER
: // = 0
281 // Upper bound needs one extra byte
282 case UCOL_BOUND_UPPER
: // = 1
283 result
[sourceIndex
++] = 2;
285 // Upper long bound needs two extra bytes
286 case UCOL_BOUND_UPPER_LONG
: // = 2
287 result
[sourceIndex
++] = 0xFF;
288 result
[sourceIndex
++] = 0xFF;
291 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
294 result
[sourceIndex
++] = 0;
298 return sourceIndex
+boundType
+1;
302 U_CAPI
void U_EXPORT2
303 ucol_setMaxVariable(UCollator
*coll
, UColReorderCode group
, UErrorCode
*pErrorCode
) {
304 if(U_FAILURE(*pErrorCode
)) { return; }
305 Collator::fromUCollator(coll
)->setMaxVariable(group
, *pErrorCode
);
308 U_CAPI UColReorderCode U_EXPORT2
309 ucol_getMaxVariable(const UCollator
*coll
) {
310 return Collator::fromUCollator(coll
)->getMaxVariable();
313 U_CAPI
uint32_t U_EXPORT2
314 ucol_setVariableTop(UCollator
*coll
, const UChar
*varTop
, int32_t len
, UErrorCode
*status
) {
315 if(U_FAILURE(*status
) || coll
== NULL
) {
318 return Collator::fromUCollator(coll
)->setVariableTop(varTop
, len
, *status
);
321 U_CAPI
uint32_t U_EXPORT2
ucol_getVariableTop(const UCollator
*coll
, UErrorCode
*status
) {
322 if(U_FAILURE(*status
) || coll
== NULL
) {
325 return Collator::fromUCollator(coll
)->getVariableTop(*status
);
328 U_CAPI
void U_EXPORT2
329 ucol_restoreVariableTop(UCollator
*coll
, const uint32_t varTop
, UErrorCode
*status
) {
330 if(U_FAILURE(*status
) || coll
== NULL
) {
333 Collator::fromUCollator(coll
)->setVariableTop(varTop
, *status
);
336 U_CAPI
void U_EXPORT2
337 ucol_setAttribute(UCollator
*coll
, UColAttribute attr
, UColAttributeValue value
, UErrorCode
*status
) {
338 if(U_FAILURE(*status
) || coll
== NULL
) {
342 Collator::fromUCollator(coll
)->setAttribute(attr
, value
, *status
);
345 U_CAPI UColAttributeValue U_EXPORT2
346 ucol_getAttribute(const UCollator
*coll
, UColAttribute attr
, UErrorCode
*status
) {
347 if(U_FAILURE(*status
) || coll
== NULL
) {
351 return Collator::fromUCollator(coll
)->getAttribute(attr
, *status
);
354 U_CAPI
void U_EXPORT2
355 ucol_setStrength( UCollator
*coll
,
356 UCollationStrength strength
)
358 UErrorCode status
= U_ZERO_ERROR
;
359 ucol_setAttribute(coll
, UCOL_STRENGTH
, strength
, &status
);
362 U_CAPI UCollationStrength U_EXPORT2
363 ucol_getStrength(const UCollator
*coll
)
365 UErrorCode status
= U_ZERO_ERROR
;
366 return ucol_getAttribute(coll
, UCOL_STRENGTH
, &status
);
369 U_CAPI
int32_t U_EXPORT2
370 ucol_getReorderCodes(const UCollator
*coll
,
372 int32_t destCapacity
,
373 UErrorCode
*status
) {
374 if (U_FAILURE(*status
)) {
378 return Collator::fromUCollator(coll
)->getReorderCodes(dest
, destCapacity
, *status
);
381 U_CAPI
void U_EXPORT2
382 ucol_setReorderCodes(UCollator
* coll
,
383 const int32_t* reorderCodes
,
384 int32_t reorderCodesLength
,
385 UErrorCode
*status
) {
386 if (U_FAILURE(*status
)) {
390 Collator::fromUCollator(coll
)->setReorderCodes(reorderCodes
, reorderCodesLength
, *status
);
393 U_CAPI
int32_t U_EXPORT2
394 ucol_getEquivalentReorderCodes(int32_t reorderCode
,
396 int32_t destCapacity
,
397 UErrorCode
*pErrorCode
) {
398 return Collator::getEquivalentReorderCodes(reorderCode
, dest
, destCapacity
, *pErrorCode
);
401 U_CAPI
void U_EXPORT2
402 ucol_getVersion(const UCollator
* coll
,
403 UVersionInfo versionInfo
)
405 Collator::fromUCollator(coll
)->getVersion(versionInfo
);
408 U_CAPI UCollationResult U_EXPORT2
409 ucol_strcollIter( const UCollator
*coll
,
410 UCharIterator
*sIter
,
411 UCharIterator
*tIter
,
414 if(!status
|| U_FAILURE(*status
)) {
418 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER
);
419 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, sIter=%p, tIter=%p", coll
, sIter
, tIter
);
421 if(sIter
== NULL
|| tIter
== NULL
|| coll
== NULL
) {
422 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
423 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL
, *status
);
427 UCollationResult result
= Collator::fromUCollator(coll
)->compare(*sIter
, *tIter
, *status
);
429 UTRACE_EXIT_VALUE_STATUS(result
, *status
);
435 /* ucol_strcoll Main public API string comparison function */
437 U_CAPI UCollationResult U_EXPORT2
438 ucol_strcoll( const UCollator
*coll
,
440 int32_t sourceLength
,
442 int32_t targetLength
)
444 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL
);
445 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
446 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source=%p, target=%p", coll
, source
, target
);
447 UTRACE_DATA2(UTRACE_VERBOSE
, "source string = %vh ", source
, sourceLength
);
448 UTRACE_DATA2(UTRACE_VERBOSE
, "target string = %vh ", target
, targetLength
);
451 UErrorCode status
= U_ZERO_ERROR
;
452 UCollationResult returnVal
= Collator::fromUCollator(coll
)->
453 compare(source
, sourceLength
, target
, targetLength
, status
);
454 UTRACE_EXIT_VALUE_STATUS(returnVal
, status
);
458 U_CAPI UCollationResult U_EXPORT2
460 const UCollator
*coll
,
462 int32_t sourceLength
,
464 int32_t targetLength
,
467 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8
);
468 if (UTRACE_LEVEL(UTRACE_VERBOSE
)) {
469 UTRACE_DATA3(UTRACE_VERBOSE
, "coll=%p, source=%p, target=%p", coll
, source
, target
);
470 UTRACE_DATA2(UTRACE_VERBOSE
, "source string = %vb ", source
, sourceLength
);
471 UTRACE_DATA2(UTRACE_VERBOSE
, "target string = %vb ", target
, targetLength
);
474 if (U_FAILURE(*status
)) {
476 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL
, *status
);
480 UCollationResult returnVal
= Collator::fromUCollator(coll
)->internalCompareUTF8(
481 source
, sourceLength
, target
, targetLength
, *status
);
482 UTRACE_EXIT_VALUE_STATUS(returnVal
, *status
);
487 /* convenience function for comparing strings */
488 U_CAPI UBool U_EXPORT2
489 ucol_greater( const UCollator
*coll
,
491 int32_t sourceLength
,
493 int32_t targetLength
)
495 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
499 /* convenience function for comparing strings */
500 U_CAPI UBool U_EXPORT2
501 ucol_greaterOrEqual( const UCollator
*coll
,
503 int32_t sourceLength
,
505 int32_t targetLength
)
507 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
511 /* convenience function for comparing strings */
512 U_CAPI UBool U_EXPORT2
513 ucol_equal( const UCollator
*coll
,
515 int32_t sourceLength
,
517 int32_t targetLength
)
519 return (ucol_strcoll(coll
, source
, sourceLength
, target
, targetLength
)
523 U_CAPI
void U_EXPORT2
524 ucol_getUCAVersion(const UCollator
* coll
, UVersionInfo info
) {
525 const Collator
*c
= Collator::fromUCollator(coll
);
529 // Note: This is tied to how the current implementation encodes the UCA version
530 // in the overall getVersion().
531 // Alternatively, we could load the root collator and get at lower-level data from there.
532 // Either way, it will reflect the input collator's UCA version only
533 // if it is a known implementation.
534 // It would be cleaner to make this a virtual Collator method.
542 U_CAPI
const UChar
* U_EXPORT2
543 ucol_getRules(const UCollator
*coll
, int32_t *length
) {
544 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
545 // OK to crash if coll==NULL: We do not want to check "this" pointers.
546 if(rbc
!= NULL
|| coll
== NULL
) {
547 const UnicodeString
&rules
= rbc
->getRules();
548 U_ASSERT(rules
.getBuffer()[rules
.length()] == 0);
549 *length
= rules
.length();
550 return rules
.getBuffer();
552 static const UChar _NUL
= 0;
557 U_CAPI
int32_t U_EXPORT2
558 ucol_getRulesEx(const UCollator
*coll
, UColRuleOption delta
, UChar
*buffer
, int32_t bufferLen
) {
560 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
561 if(rbc
!= NULL
|| coll
== NULL
) {
562 rbc
->getRules(delta
, rules
);
564 if(buffer
!= NULL
&& bufferLen
> 0) {
565 UErrorCode errorCode
= U_ZERO_ERROR
;
566 return rules
.extract(buffer
, bufferLen
, errorCode
);
568 return rules
.length();
572 U_CAPI
const char * U_EXPORT2
573 ucol_getLocale(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
574 return ucol_getLocaleByType(coll
, type
, status
);
577 U_CAPI
const char * U_EXPORT2
578 ucol_getLocaleByType(const UCollator
*coll
, ULocDataLocaleType type
, UErrorCode
*status
) {
579 if(U_FAILURE(*status
)) {
582 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE
);
583 UTRACE_DATA1(UTRACE_INFO
, "coll=%p", coll
);
586 const RuleBasedCollator
*rbc
= RuleBasedCollator::rbcFromUCollator(coll
);
587 if(rbc
== NULL
&& coll
!= NULL
) {
588 *status
= U_UNSUPPORTED_ERROR
;
591 result
= rbc
->internalGetLocaleID(type
, *status
);
594 UTRACE_DATA1(UTRACE_INFO
, "result = %s", result
);
595 UTRACE_EXIT_STATUS(*status
);
599 U_CAPI USet
* U_EXPORT2
600 ucol_getTailoredSet(const UCollator
*coll
, UErrorCode
*status
) {
601 if(U_FAILURE(*status
)) {
604 UnicodeSet
*set
= Collator::fromUCollator(coll
)->getTailoredSet(*status
);
605 if(U_FAILURE(*status
)) {
609 return set
->toUSet();
612 U_CAPI UBool U_EXPORT2
613 ucol_equals(const UCollator
*source
, const UCollator
*target
) {
614 return source
== target
||
615 (*Collator::fromUCollator(source
)) == (*Collator::fromUCollator(target
));
618 #endif /* #if !UCONFIG_NO_COLLATION */