]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/ucol.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / ucol.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Modification history
12 * Date Name Comments
13 * 1996-1999 various members of ICU team maintained C API for collation framework
14 * 02/16/2001 synwee Added internal method getPrevSpecialCE
15 * 03/01/2001 synwee Added maxexpansion functionality.
16 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
17 * 2012-2014 markus Rewritten in C++ again.
18 */
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_COLLATION
23
24 #include "unicode/coll.h"
25 #include "unicode/tblcoll.h"
26 #include "unicode/bytestream.h"
27 #include "unicode/coleitr.h"
28 #include "unicode/ucoleitr.h"
29 #include "unicode/ustring.h"
30 #include "cmemory.h"
31 #include "collation.h"
32 #include "cstring.h"
33 #include "putilimp.h"
34 #include "uassert.h"
35 #include "utracimp.h"
36
37 U_NAMESPACE_USE
38
39 U_CAPI UCollator* U_EXPORT2
40 ucol_openBinary(const uint8_t *bin, int32_t length,
41 const UCollator *base,
42 UErrorCode *status)
43 {
44 if(U_FAILURE(*status)) { return NULL; }
45 RuleBasedCollator *coll = new RuleBasedCollator(
46 bin, length,
47 RuleBasedCollator::rbcFromUCollator(base),
48 *status);
49 if(coll == NULL) {
50 *status = U_MEMORY_ALLOCATION_ERROR;
51 return NULL;
52 }
53 if(U_FAILURE(*status)) {
54 delete coll;
55 return NULL;
56 }
57 return coll->toUCollator();
58 }
59
60 U_CAPI int32_t U_EXPORT2
61 ucol_cloneBinary(const UCollator *coll,
62 uint8_t *buffer, int32_t capacity,
63 UErrorCode *status)
64 {
65 if(U_FAILURE(*status)) {
66 return 0;
67 }
68 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
69 if(rbc == NULL && coll != NULL) {
70 *status = U_UNSUPPORTED_ERROR;
71 return 0;
72 }
73 return rbc->cloneBinary(buffer, capacity, *status);
74 }
75
76 U_CAPI UCollator* U_EXPORT2
77 ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status)
78 {
79 if (status == NULL || U_FAILURE(*status)){
80 return NULL;
81 }
82 if (coll == NULL) {
83 *status = U_ILLEGAL_ARGUMENT_ERROR;
84 return NULL;
85 }
86 if (pBufferSize != NULL) {
87 int32_t inputSize = *pBufferSize;
88 *pBufferSize = 1;
89 if (inputSize == 0) {
90 return NULL; // preflighting for deprecated functionality
91 }
92 }
93 Collator *newColl = Collator::fromUCollator(coll)->clone();
94 if (newColl == NULL) {
95 *status = U_MEMORY_ALLOCATION_ERROR;
96 } else {
97 *status = U_SAFECLONE_ALLOCATED_WARNING;
98 }
99 return newColl->toUCollator();
100 }
101
102 U_CAPI void U_EXPORT2
103 ucol_close(UCollator *coll)
104 {
105 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
106 UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
107 if(coll != NULL) {
108 delete Collator::fromUCollator(coll);
109 }
110 UTRACE_EXIT();
111 }
112
113 U_CAPI int32_t U_EXPORT2
114 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
115 const uint8_t *src2, int32_t src2Length,
116 uint8_t *dest, int32_t destCapacity) {
117 /* check arguments */
118 if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
119 src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
120 destCapacity<0 || (destCapacity>0 && dest==NULL)
121 ) {
122 /* error, attempt to write a zero byte and return 0 */
123 if(dest!=NULL && destCapacity>0) {
124 *dest=0;
125 }
126 return 0;
127 }
128
129 /* check lengths and capacity */
130 if(src1Length<0) {
131 src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
132 }
133 if(src2Length<0) {
134 src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
135 }
136
137 int32_t destLength=src1Length+src2Length;
138 if(destLength>destCapacity) {
139 /* the merged sort key does not fit into the destination */
140 return destLength;
141 }
142
143 /* merge the sort keys with the same number of levels */
144 uint8_t *p=dest;
145 for(;;) {
146 /* copy level from src1 not including 00 or 01 */
147 uint8_t b;
148 while((b=*src1)>=2) {
149 ++src1;
150 *p++=b;
151 }
152
153 /* add a 02 merge separator */
154 *p++=2;
155
156 /* copy level from src2 not including 00 or 01 */
157 while((b=*src2)>=2) {
158 ++src2;
159 *p++=b;
160 }
161
162 /* if both sort keys have another level, then add a 01 level separator and continue */
163 if(*src1==1 && *src2==1) {
164 ++src1;
165 ++src2;
166 *p++=1;
167 } else {
168 break;
169 }
170 }
171
172 /*
173 * here, at least one sort key is finished now, but the other one
174 * might have some contents left from containing more levels;
175 * that contents is just appended to the result
176 */
177 if(*src1!=0) {
178 /* src1 is not finished, therefore *src2==0, and src1 is appended */
179 src2=src1;
180 }
181 /* append src2, "the other, unfinished sort key" */
182 while((*p++=*src2++)!=0) {}
183
184 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
185 return (int32_t)(p-dest);
186 }
187
188 U_CAPI int32_t U_EXPORT2
189 ucol_getSortKey(const UCollator *coll,
190 const UChar *source,
191 int32_t sourceLength,
192 uint8_t *result,
193 int32_t resultLength)
194 {
195 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
196 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
197 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
198 ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
199 }
200
201 int32_t keySize = Collator::fromUCollator(coll)->
202 getSortKey(source, sourceLength, result, resultLength);
203
204 UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
205 UTRACE_EXIT_VALUE(keySize);
206 return keySize;
207 }
208
209 U_CAPI int32_t U_EXPORT2
210 ucol_nextSortKeyPart(const UCollator *coll,
211 UCharIterator *iter,
212 uint32_t state[2],
213 uint8_t *dest, int32_t count,
214 UErrorCode *status)
215 {
216 /* error checking */
217 if(status==NULL || U_FAILURE(*status)) {
218 return 0;
219 }
220 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
221 UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
222 coll, iter, state[0], state[1], dest, count);
223
224 int32_t i = Collator::fromUCollator(coll)->
225 internalNextSortKeyPart(iter, state, dest, count, *status);
226
227 // Return number of meaningful sortkey bytes.
228 UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
229 dest,i, state[0], state[1]);
230 UTRACE_EXIT_VALUE_STATUS(i, *status);
231 return i;
232 }
233
234 /**
235 * Produce a bound for a given sortkey and a number of levels.
236 */
237 U_CAPI int32_t U_EXPORT2
238 ucol_getBound(const uint8_t *source,
239 int32_t sourceLength,
240 UColBoundMode boundType,
241 uint32_t noOfLevels,
242 uint8_t *result,
243 int32_t resultLength,
244 UErrorCode *status)
245 {
246 // consistency checks
247 if(status == NULL || U_FAILURE(*status)) {
248 return 0;
249 }
250 if(source == NULL) {
251 *status = U_ILLEGAL_ARGUMENT_ERROR;
252 return 0;
253 }
254
255 int32_t sourceIndex = 0;
256 // Scan the string until we skip enough of the key OR reach the end of the key
257 do {
258 sourceIndex++;
259 if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) {
260 noOfLevels--;
261 }
262 } while (noOfLevels > 0
263 && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
264
265 if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
266 && noOfLevels > 0) {
267 *status = U_SORT_KEY_TOO_SHORT_WARNING;
268 }
269
270
271 // READ ME: this code assumes that the values for boundType
272 // enum will not changes. They are set so that the enum value
273 // corresponds to the number of extra bytes each bound type
274 // needs.
275 if(result != NULL && resultLength >= sourceIndex+boundType) {
276 uprv_memcpy(result, source, sourceIndex);
277 switch(boundType) {
278 // Lower bound just gets terminated. No extra bytes
279 case UCOL_BOUND_LOWER: // = 0
280 break;
281 // Upper bound needs one extra byte
282 case UCOL_BOUND_UPPER: // = 1
283 result[sourceIndex++] = 2;
284 break;
285 // Upper long bound needs two extra bytes
286 case UCOL_BOUND_UPPER_LONG: // = 2
287 result[sourceIndex++] = 0xFF;
288 result[sourceIndex++] = 0xFF;
289 break;
290 default:
291 *status = U_ILLEGAL_ARGUMENT_ERROR;
292 return 0;
293 }
294 result[sourceIndex++] = 0;
295
296 return sourceIndex;
297 } else {
298 return sourceIndex+boundType+1;
299 }
300 }
301
302 U_CAPI void U_EXPORT2
303 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) {
304 if(U_FAILURE(*pErrorCode)) { return; }
305 Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode);
306 }
307
308 U_CAPI UColReorderCode U_EXPORT2
309 ucol_getMaxVariable(const UCollator *coll) {
310 return Collator::fromUCollator(coll)->getMaxVariable();
311 }
312
313 U_CAPI uint32_t U_EXPORT2
314 ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
315 if(U_FAILURE(*status) || coll == NULL) {
316 return 0;
317 }
318 return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status);
319 }
320
321 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
322 if(U_FAILURE(*status) || coll == NULL) {
323 return 0;
324 }
325 return Collator::fromUCollator(coll)->getVariableTop(*status);
326 }
327
328 U_CAPI void U_EXPORT2
329 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
330 if(U_FAILURE(*status) || coll == NULL) {
331 return;
332 }
333 Collator::fromUCollator(coll)->setVariableTop(varTop, *status);
334 }
335
336 U_CAPI void U_EXPORT2
337 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
338 if(U_FAILURE(*status) || coll == NULL) {
339 return;
340 }
341
342 Collator::fromUCollator(coll)->setAttribute(attr, value, *status);
343 }
344
345 U_CAPI UColAttributeValue U_EXPORT2
346 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
347 if(U_FAILURE(*status) || coll == NULL) {
348 return UCOL_DEFAULT;
349 }
350
351 return Collator::fromUCollator(coll)->getAttribute(attr, *status);
352 }
353
354 U_CAPI void U_EXPORT2
355 ucol_setStrength( UCollator *coll,
356 UCollationStrength strength)
357 {
358 UErrorCode status = U_ZERO_ERROR;
359 ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
360 }
361
362 U_CAPI UCollationStrength U_EXPORT2
363 ucol_getStrength(const UCollator *coll)
364 {
365 UErrorCode status = U_ZERO_ERROR;
366 return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
367 }
368
369 U_CAPI int32_t U_EXPORT2
370 ucol_getReorderCodes(const UCollator *coll,
371 int32_t *dest,
372 int32_t destCapacity,
373 UErrorCode *status) {
374 if (U_FAILURE(*status)) {
375 return 0;
376 }
377
378 return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status);
379 }
380
381 U_CAPI void U_EXPORT2
382 ucol_setReorderCodes(UCollator* coll,
383 const int32_t* reorderCodes,
384 int32_t reorderCodesLength,
385 UErrorCode *status) {
386 if (U_FAILURE(*status)) {
387 return;
388 }
389
390 Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status);
391 }
392
393 U_CAPI int32_t U_EXPORT2
394 ucol_getEquivalentReorderCodes(int32_t reorderCode,
395 int32_t* dest,
396 int32_t destCapacity,
397 UErrorCode *pErrorCode) {
398 return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode);
399 }
400
401 U_CAPI void U_EXPORT2
402 ucol_getVersion(const UCollator* coll,
403 UVersionInfo versionInfo)
404 {
405 Collator::fromUCollator(coll)->getVersion(versionInfo);
406 }
407
408 U_CAPI UCollationResult U_EXPORT2
409 ucol_strcollIter( const UCollator *coll,
410 UCharIterator *sIter,
411 UCharIterator *tIter,
412 UErrorCode *status)
413 {
414 if(!status || U_FAILURE(*status)) {
415 return UCOL_EQUAL;
416 }
417
418 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
419 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
420
421 if(sIter == NULL || tIter == NULL || coll == NULL) {
422 *status = U_ILLEGAL_ARGUMENT_ERROR;
423 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
424 return UCOL_EQUAL;
425 }
426
427 UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status);
428
429 UTRACE_EXIT_VALUE_STATUS(result, *status);
430 return result;
431 }
432
433
434 /* */
435 /* ucol_strcoll Main public API string comparison function */
436 /* */
437 U_CAPI UCollationResult U_EXPORT2
438 ucol_strcoll( const UCollator *coll,
439 const UChar *source,
440 int32_t sourceLength,
441 const UChar *target,
442 int32_t targetLength)
443 {
444 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
445 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
446 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
447 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
448 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
449 }
450
451 UErrorCode status = U_ZERO_ERROR;
452 UCollationResult returnVal = Collator::fromUCollator(coll)->
453 compare(source, sourceLength, target, targetLength, status);
454 UTRACE_EXIT_VALUE_STATUS(returnVal, status);
455 return returnVal;
456 }
457
458 U_CAPI UCollationResult U_EXPORT2
459 ucol_strcollUTF8(
460 const UCollator *coll,
461 const char *source,
462 int32_t sourceLength,
463 const char *target,
464 int32_t targetLength,
465 UErrorCode *status)
466 {
467 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8);
468 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
469 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
470 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength);
471 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength);
472 }
473
474 if (U_FAILURE(*status)) {
475 /* do nothing */
476 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
477 return UCOL_EQUAL;
478 }
479
480 UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8(
481 source, sourceLength, target, targetLength, *status);
482 UTRACE_EXIT_VALUE_STATUS(returnVal, *status);
483 return returnVal;
484 }
485
486
487 /* convenience function for comparing strings */
488 U_CAPI UBool U_EXPORT2
489 ucol_greater( const UCollator *coll,
490 const UChar *source,
491 int32_t sourceLength,
492 const UChar *target,
493 int32_t targetLength)
494 {
495 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
496 == UCOL_GREATER);
497 }
498
499 /* convenience function for comparing strings */
500 U_CAPI UBool U_EXPORT2
501 ucol_greaterOrEqual( const UCollator *coll,
502 const UChar *source,
503 int32_t sourceLength,
504 const UChar *target,
505 int32_t targetLength)
506 {
507 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
508 != UCOL_LESS);
509 }
510
511 /* convenience function for comparing strings */
512 U_CAPI UBool U_EXPORT2
513 ucol_equal( const UCollator *coll,
514 const UChar *source,
515 int32_t sourceLength,
516 const UChar *target,
517 int32_t targetLength)
518 {
519 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
520 == UCOL_EQUAL);
521 }
522
523 U_CAPI void U_EXPORT2
524 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
525 const Collator *c = Collator::fromUCollator(coll);
526 if(c != NULL) {
527 UVersionInfo v;
528 c->getVersion(v);
529 // Note: This is tied to how the current implementation encodes the UCA version
530 // in the overall getVersion().
531 // Alternatively, we could load the root collator and get at lower-level data from there.
532 // Either way, it will reflect the input collator's UCA version only
533 // if it is a known implementation.
534 // It would be cleaner to make this a virtual Collator method.
535 info[0] = v[1] >> 3;
536 info[1] = v[1] & 7;
537 info[2] = v[2] >> 6;
538 info[3] = 0;
539 }
540 }
541
542 U_CAPI const UChar * U_EXPORT2
543 ucol_getRules(const UCollator *coll, int32_t *length) {
544 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
545 // OK to crash if coll==NULL: We do not want to check "this" pointers.
546 if(rbc != NULL || coll == NULL) {
547 const UnicodeString &rules = rbc->getRules();
548 U_ASSERT(rules.getBuffer()[rules.length()] == 0);
549 *length = rules.length();
550 return rules.getBuffer();
551 }
552 static const UChar _NUL = 0;
553 *length = 0;
554 return &_NUL;
555 }
556
557 U_CAPI int32_t U_EXPORT2
558 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
559 UnicodeString rules;
560 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
561 if(rbc != NULL || coll == NULL) {
562 rbc->getRules(delta, rules);
563 }
564 if(buffer != NULL && bufferLen > 0) {
565 UErrorCode errorCode = U_ZERO_ERROR;
566 return rules.extract(buffer, bufferLen, errorCode);
567 } else {
568 return rules.length();
569 }
570 }
571
572 U_CAPI const char * U_EXPORT2
573 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
574 return ucol_getLocaleByType(coll, type, status);
575 }
576
577 U_CAPI const char * U_EXPORT2
578 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
579 if(U_FAILURE(*status)) {
580 return NULL;
581 }
582 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
583 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
584
585 const char *result;
586 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
587 if(rbc == NULL && coll != NULL) {
588 *status = U_UNSUPPORTED_ERROR;
589 result = NULL;
590 } else {
591 result = rbc->internalGetLocaleID(type, *status);
592 }
593
594 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
595 UTRACE_EXIT_STATUS(*status);
596 return result;
597 }
598
599 U_CAPI USet * U_EXPORT2
600 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) {
601 if(U_FAILURE(*status)) {
602 return NULL;
603 }
604 UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status);
605 if(U_FAILURE(*status)) {
606 delete set;
607 return NULL;
608 }
609 return set->toUSet();
610 }
611
612 U_CAPI UBool U_EXPORT2
613 ucol_equals(const UCollator *source, const UCollator *target) {
614 return source == target ||
615 (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target));
616 }
617
618 #endif /* #if !UCONFIG_NO_COLLATION */