]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ustrcase.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / common / ustrcase.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2001-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: ustrcase.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002feb20
14 * created by: Markus W. Scherer
15 *
16 * Implementation file for string casing C API functions.
17 * Uses functions from uchar.c for basic functionality that requires access
18 * to the Unicode Character Database (uprops.dat).
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/ustring.h"
24 #include "unicode/ucasemap.h"
25 #include "unicode/ubrk.h"
26 #include "unicode/utf.h"
27 #include "unicode/utf16.h"
28 #include "cmemory.h"
29 #include "ucase.h"
30 #include "ustr_imp.h"
31
32 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33
34 U_NAMESPACE_USE
35
36 /* string casing ------------------------------------------------------------ */
37
38 /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
39 static inline int32_t
40 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
41 int32_t result, const UChar *s) {
42 UChar32 c;
43 int32_t length;
44
45 /* decode the result */
46 if(result<0) {
47 /* (not) original code point */
48 c=~result;
49 length=-1;
50 } else if(result<=UCASE_MAX_STRING_LENGTH) {
51 c=U_SENTINEL;
52 length=result;
53 } else {
54 c=result;
55 length=-1;
56 }
57
58 if(destIndex<destCapacity) {
59 /* append the result */
60 if(length<0) {
61 /* code point */
62 UBool isError=FALSE;
63 U16_APPEND(dest, destIndex, destCapacity, c, isError);
64 if(isError) {
65 /* overflow, nothing written */
66 destIndex+=U16_LENGTH(c);
67 }
68 } else {
69 /* string */
70 if((destIndex+length)<=destCapacity) {
71 while(length>0) {
72 dest[destIndex++]=*s++;
73 --length;
74 }
75 } else {
76 /* overflow */
77 destIndex+=length;
78 }
79 }
80 } else {
81 /* preflight */
82 if(length<0) {
83 destIndex+=U16_LENGTH(c);
84 } else {
85 destIndex+=length;
86 }
87 }
88 return destIndex;
89 }
90
91 static UChar32 U_CALLCONV
92 utf16_caseContextIterator(void *context, int8_t dir) {
93 UCaseContext *csc=(UCaseContext *)context;
94 UChar32 c;
95
96 if(dir<0) {
97 /* reset for backward iteration */
98 csc->index=csc->cpStart;
99 csc->dir=dir;
100 } else if(dir>0) {
101 /* reset for forward iteration */
102 csc->index=csc->cpLimit;
103 csc->dir=dir;
104 } else {
105 /* continue current iteration direction */
106 dir=csc->dir;
107 }
108
109 if(dir<0) {
110 if(csc->start<csc->index) {
111 U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
112 return c;
113 }
114 } else {
115 if(csc->index<csc->limit) {
116 U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
117 return c;
118 }
119 }
120 return U_SENTINEL;
121 }
122
123 /*
124 * Case-maps [srcStart..srcLimit[ but takes
125 * context [0..srcLength[ into account.
126 */
127 static int32_t
128 _caseMap(const UCaseMap *csm, UCaseMapFull *map,
129 UChar *dest, int32_t destCapacity,
130 const UChar *src, UCaseContext *csc,
131 int32_t srcStart, int32_t srcLimit,
132 UErrorCode *pErrorCode) {
133 const UChar *s;
134 UChar32 c, c2 = 0;
135 int32_t srcIndex, destIndex;
136 int32_t locCache;
137
138 locCache=csm->locCache;
139
140 /* case mapping loop */
141 srcIndex=srcStart;
142 destIndex=0;
143 while(srcIndex<srcLimit) {
144 csc->cpStart=srcIndex;
145 U16_NEXT(src, srcIndex, srcLimit, c);
146 csc->cpLimit=srcIndex;
147 c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
148 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
149 /* fast path version of appendResult() for BMP results */
150 dest[destIndex++]=(UChar)c2;
151 } else {
152 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
153 }
154 }
155
156 if(destIndex>destCapacity) {
157 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
158 }
159 return destIndex;
160 }
161
162 #if !UCONFIG_NO_BREAK_ITERATION
163
164 U_CFUNC int32_t U_CALLCONV
165 ustrcase_internalToTitle(const UCaseMap *csm,
166 UChar *dest, int32_t destCapacity,
167 const UChar *src, int32_t srcLength,
168 UErrorCode *pErrorCode) {
169 const UChar *s;
170 UChar32 c;
171 int32_t prev, titleStart, titleLimit, idx, destIndex, length;
172 UBool isFirstIndex;
173
174 if(U_FAILURE(*pErrorCode)) {
175 return 0;
176 }
177
178 // Use the C++ abstract base class to minimize dependencies.
179 // TODO: Change UCaseMap.iter to store a BreakIterator directly.
180 BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
181
182 /* set up local variables */
183 int32_t locCache=csm->locCache;
184 UCaseContext csc=UCASECONTEXT_INITIALIZER;
185 csc.p=(void *)src;
186 csc.limit=srcLength;
187 destIndex=0;
188 prev=0;
189 isFirstIndex=TRUE;
190
191 /* titlecasing loop */
192 while(prev<srcLength) {
193 /* find next index where to titlecase */
194 if(isFirstIndex) {
195 isFirstIndex=FALSE;
196 idx=bi->first();
197 } else {
198 idx=bi->next();
199 }
200 if(idx==UBRK_DONE || idx>srcLength) {
201 idx=srcLength;
202 }
203
204 /*
205 * Unicode 4 & 5 section 3.13 Default Case Operations:
206 *
207 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
208 * #29, "Text Boundaries." Between each pair of word boundaries, find the first
209 * cased character F. If F exists, map F to default_title(F); then map each
210 * subsequent character C to default_lower(C).
211 *
212 * In this implementation, segment [prev..index[ into 3 parts:
213 * a) uncased characters (copy as-is) [prev..titleStart[
214 * b) first case letter (titlecase) [titleStart..titleLimit[
215 * c) subsequent characters (lowercase) [titleLimit..index[
216 */
217 if(prev<idx) {
218 /* find and copy uncased characters [prev..titleStart[ */
219 titleStart=titleLimit=prev;
220 U16_NEXT(src, titleLimit, idx, c);
221 if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
222 /* Adjust the titlecasing index (titleStart) to the next cased character. */
223 for(;;) {
224 titleStart=titleLimit;
225 if(titleLimit==idx) {
226 /*
227 * only uncased characters in [prev..index[
228 * stop with titleStart==titleLimit==index
229 */
230 break;
231 }
232 U16_NEXT(src, titleLimit, idx, c);
233 if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
234 break; /* cased letter at [titleStart..titleLimit[ */
235 }
236 }
237 length=titleStart-prev;
238 if(length>0) {
239 if((destIndex+length)<=destCapacity) {
240 uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
241 }
242 destIndex+=length;
243 }
244 }
245
246 if(titleStart<titleLimit) {
247 /* titlecase c which is from [titleStart..titleLimit[ */
248 csc.cpStart=titleStart;
249 csc.cpLimit=titleLimit;
250 c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache);
251 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
252
253 /* Special case Dutch IJ titlecasing */
254 if ( titleStart+1 < idx &&
255 ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH &&
256 ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
257 ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) {
258 c=(UChar32) 0x004A;
259 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
260 titleLimit++;
261 }
262
263 /* lowercase [titleLimit..index[ */
264 if(titleLimit<idx) {
265 if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
266 /* Normal operation: Lowercase the rest of the word. */
267 destIndex+=
268 _caseMap(
269 csm, ucase_toFullLower,
270 dest+destIndex, destCapacity-destIndex,
271 src, &csc,
272 titleLimit, idx,
273 pErrorCode);
274 } else {
275 /* Optionally just copy the rest of the word unchanged. */
276 length=idx-titleLimit;
277 if((destIndex+length)<=destCapacity) {
278 uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR);
279 }
280 destIndex+=length;
281 }
282 }
283 }
284 }
285
286 prev=idx;
287 }
288
289 if(destIndex>destCapacity) {
290 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
291 }
292 return destIndex;
293 }
294
295 #endif // !UCONFIG_NO_BREAK_ITERATION
296
297 /* functions available in the common library (for unistr_case.cpp) */
298
299 U_CFUNC int32_t U_CALLCONV
300 ustrcase_internalToLower(const UCaseMap *csm,
301 UChar *dest, int32_t destCapacity,
302 const UChar *src, int32_t srcLength,
303 UErrorCode *pErrorCode) {
304 UCaseContext csc=UCASECONTEXT_INITIALIZER;
305 csc.p=(void *)src;
306 csc.limit=srcLength;
307 return _caseMap(
308 csm, ucase_toFullLower,
309 dest, destCapacity,
310 src, &csc, 0, srcLength,
311 pErrorCode);
312 }
313
314 U_CFUNC int32_t U_CALLCONV
315 ustrcase_internalToUpper(const UCaseMap *csm,
316 UChar *dest, int32_t destCapacity,
317 const UChar *src, int32_t srcLength,
318 UErrorCode *pErrorCode) {
319 UCaseContext csc=UCASECONTEXT_INITIALIZER;
320 csc.p=(void *)src;
321 csc.limit=srcLength;
322 return _caseMap(
323 csm, ucase_toFullUpper,
324 dest, destCapacity,
325 src, &csc, 0, srcLength,
326 pErrorCode);
327 }
328
329 static int32_t
330 ustr_foldCase(const UCaseProps *csp,
331 UChar *dest, int32_t destCapacity,
332 const UChar *src, int32_t srcLength,
333 uint32_t options,
334 UErrorCode *pErrorCode) {
335 int32_t srcIndex, destIndex;
336
337 const UChar *s;
338 UChar32 c, c2 = 0;
339
340 /* case mapping loop */
341 srcIndex=destIndex=0;
342 while(srcIndex<srcLength) {
343 U16_NEXT(src, srcIndex, srcLength, c);
344 c=ucase_toFullFolding(csp, c, &s, options);
345 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
346 /* fast path version of appendResult() for BMP results */
347 dest[destIndex++]=(UChar)c2;
348 } else {
349 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
350 }
351 }
352
353 if(destIndex>destCapacity) {
354 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
355 }
356 return destIndex;
357 }
358
359 U_CFUNC int32_t U_CALLCONV
360 ustrcase_internalFold(const UCaseMap *csm,
361 UChar *dest, int32_t destCapacity,
362 const UChar *src, int32_t srcLength,
363 UErrorCode *pErrorCode) {
364 return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
365 }
366
367 U_CFUNC int32_t
368 ustrcase_map(const UCaseMap *csm,
369 UChar *dest, int32_t destCapacity,
370 const UChar *src, int32_t srcLength,
371 UStringCaseMapper *stringCaseMapper,
372 UErrorCode *pErrorCode) {
373 UChar buffer[300];
374 UChar *temp;
375
376 int32_t destLength;
377
378 /* check argument values */
379 if(U_FAILURE(*pErrorCode)) {
380 return 0;
381 }
382 if( destCapacity<0 ||
383 (dest==NULL && destCapacity>0) ||
384 src==NULL ||
385 srcLength<-1
386 ) {
387 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
388 return 0;
389 }
390
391 /* get the string length */
392 if(srcLength==-1) {
393 srcLength=u_strlen(src);
394 }
395
396 /* check for overlapping source and destination */
397 if( dest!=NULL &&
398 ((src>=dest && src<(dest+destCapacity)) ||
399 (dest>=src && dest<(src+srcLength)))
400 ) {
401 /* overlap: provide a temporary destination buffer and later copy the result */
402 if(destCapacity<=LENGTHOF(buffer)) {
403 /* the stack buffer is large enough */
404 temp=buffer;
405 } else {
406 /* allocate a buffer */
407 temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
408 if(temp==NULL) {
409 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
410 return 0;
411 }
412 }
413 } else {
414 temp=dest;
415 }
416
417 destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode);
418 if(temp!=dest) {
419 /* copy the result string to the destination buffer */
420 if(destLength>0) {
421 int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
422 if(copyLength>0) {
423 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
424 }
425 }
426 if(temp!=buffer) {
427 uprv_free(temp);
428 }
429 }
430
431 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
432 }
433
434 /* public API functions */
435
436 U_CAPI int32_t U_EXPORT2
437 u_strFoldCase(UChar *dest, int32_t destCapacity,
438 const UChar *src, int32_t srcLength,
439 uint32_t options,
440 UErrorCode *pErrorCode) {
441 UCaseMap csm=UCASEMAP_INITIALIZER;
442 csm.csp=ucase_getSingleton();
443 csm.options=options;
444 return ustrcase_map(
445 &csm,
446 dest, destCapacity,
447 src, srcLength,
448 ustrcase_internalFold, pErrorCode);
449 }
450
451 /* case-insensitive string comparisons -------------------------------------- */
452
453 /*
454 * This function is a copy of unorm_cmpEquivFold() minus the parts for
455 * canonical equivalence.
456 * Keep the functions in sync, and see there for how this works.
457 * The duplication is for modularization:
458 * It makes caseless (but not canonical caseless) matches independent of
459 * the normalization code.
460 */
461
462 /* stack element for previous-level source/decomposition pointers */
463 struct CmpEquivLevel {
464 const UChar *start, *s, *limit;
465 };
466 typedef struct CmpEquivLevel CmpEquivLevel;
467
468 /* internal function */
469 U_CFUNC int32_t
470 u_strcmpFold(const UChar *s1, int32_t length1,
471 const UChar *s2, int32_t length2,
472 uint32_t options,
473 UErrorCode *pErrorCode) {
474 const UCaseProps *csp;
475
476 /* current-level start/limit - s1/s2 as current */
477 const UChar *start1, *start2, *limit1, *limit2;
478
479 /* case folding variables */
480 const UChar *p;
481 int32_t length;
482
483 /* stacks of previous-level start/current/limit */
484 CmpEquivLevel stack1[2], stack2[2];
485
486 /* case folding buffers, only use current-level start/limit */
487 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
488
489 /* track which is the current level per string */
490 int32_t level1, level2;
491
492 /* current code units, and code points for lookups */
493 UChar32 c1, c2, cp1, cp2;
494
495 /* no argument error checking because this itself is not an API */
496
497 /*
498 * assume that at least the option U_COMPARE_IGNORE_CASE is set
499 * otherwise this function would have to behave exactly as uprv_strCompare()
500 */
501 csp=ucase_getSingleton();
502 if(U_FAILURE(*pErrorCode)) {
503 return 0;
504 }
505
506 /* initialize */
507 start1=s1;
508 if(length1==-1) {
509 limit1=NULL;
510 } else {
511 limit1=s1+length1;
512 }
513
514 start2=s2;
515 if(length2==-1) {
516 limit2=NULL;
517 } else {
518 limit2=s2+length2;
519 }
520
521 level1=level2=0;
522 c1=c2=-1;
523
524 /* comparison loop */
525 for(;;) {
526 /*
527 * here a code unit value of -1 means "get another code unit"
528 * below it will mean "this source is finished"
529 */
530
531 if(c1<0) {
532 /* get next code unit from string 1, post-increment */
533 for(;;) {
534 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
535 if(level1==0) {
536 c1=-1;
537 break;
538 }
539 } else {
540 ++s1;
541 break;
542 }
543
544 /* reached end of level buffer, pop one level */
545 do {
546 --level1;
547 start1=stack1[level1].start; /*Not uninitialized*/
548 } while(start1==NULL);
549 s1=stack1[level1].s; /*Not uninitialized*/
550 limit1=stack1[level1].limit; /*Not uninitialized*/
551 }
552 }
553
554 if(c2<0) {
555 /* get next code unit from string 2, post-increment */
556 for(;;) {
557 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
558 if(level2==0) {
559 c2=-1;
560 break;
561 }
562 } else {
563 ++s2;
564 break;
565 }
566
567 /* reached end of level buffer, pop one level */
568 do {
569 --level2;
570 start2=stack2[level2].start; /*Not uninitialized*/
571 } while(start2==NULL);
572 s2=stack2[level2].s; /*Not uninitialized*/
573 limit2=stack2[level2].limit; /*Not uninitialized*/
574 }
575 }
576
577 /*
578 * compare c1 and c2
579 * either variable c1, c2 is -1 only if the corresponding string is finished
580 */
581 if(c1==c2) {
582 if(c1<0) {
583 return 0; /* c1==c2==-1 indicating end of strings */
584 }
585 c1=c2=-1; /* make us fetch new code units */
586 continue;
587 } else if(c1<0) {
588 return -1; /* string 1 ends before string 2 */
589 } else if(c2<0) {
590 return 1; /* string 2 ends before string 1 */
591 }
592 /* c1!=c2 && c1>=0 && c2>=0 */
593
594 /* get complete code points for c1, c2 for lookups if either is a surrogate */
595 cp1=c1;
596 if(U_IS_SURROGATE(c1)) {
597 UChar c;
598
599 if(U_IS_SURROGATE_LEAD(c1)) {
600 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
601 /* advance ++s1; only below if cp1 decomposes/case-folds */
602 cp1=U16_GET_SUPPLEMENTARY(c1, c);
603 }
604 } else /* isTrail(c1) */ {
605 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
606 cp1=U16_GET_SUPPLEMENTARY(c, c1);
607 }
608 }
609 }
610
611 cp2=c2;
612 if(U_IS_SURROGATE(c2)) {
613 UChar c;
614
615 if(U_IS_SURROGATE_LEAD(c2)) {
616 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
617 /* advance ++s2; only below if cp2 decomposes/case-folds */
618 cp2=U16_GET_SUPPLEMENTARY(c2, c);
619 }
620 } else /* isTrail(c2) */ {
621 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
622 cp2=U16_GET_SUPPLEMENTARY(c, c2);
623 }
624 }
625 }
626
627 /*
628 * go down one level for each string
629 * continue with the main loop as soon as there is a real change
630 */
631
632 if( level1==0 &&
633 (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
634 ) {
635 /* cp1 case-folds to the code point "length" or to p[length] */
636 if(U_IS_SURROGATE(c1)) {
637 if(U_IS_SURROGATE_LEAD(c1)) {
638 /* advance beyond source surrogate pair if it case-folds */
639 ++s1;
640 } else /* isTrail(c1) */ {
641 /*
642 * we got a supplementary code point when hitting its trail surrogate,
643 * therefore the lead surrogate must have been the same as in the other string;
644 * compare this decomposition with the lead surrogate in the other string
645 * remember that this simulates bulk text replacement:
646 * the decomposition would replace the entire code point
647 */
648 --s2;
649 c2=*(s2-1);
650 }
651 }
652
653 /* push current level pointers */
654 stack1[0].start=start1;
655 stack1[0].s=s1;
656 stack1[0].limit=limit1;
657 ++level1;
658
659 /* copy the folding result to fold1[] */
660 if(length<=UCASE_MAX_STRING_LENGTH) {
661 u_memcpy(fold1, p, length);
662 } else {
663 int32_t i=0;
664 U16_APPEND_UNSAFE(fold1, i, length);
665 length=i;
666 }
667
668 /* set next level pointers to case folding */
669 start1=s1=fold1;
670 limit1=fold1+length;
671
672 /* get ready to read from decomposition, continue with loop */
673 c1=-1;
674 continue;
675 }
676
677 if( level2==0 &&
678 (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
679 ) {
680 /* cp2 case-folds to the code point "length" or to p[length] */
681 if(U_IS_SURROGATE(c2)) {
682 if(U_IS_SURROGATE_LEAD(c2)) {
683 /* advance beyond source surrogate pair if it case-folds */
684 ++s2;
685 } else /* isTrail(c2) */ {
686 /*
687 * we got a supplementary code point when hitting its trail surrogate,
688 * therefore the lead surrogate must have been the same as in the other string;
689 * compare this decomposition with the lead surrogate in the other string
690 * remember that this simulates bulk text replacement:
691 * the decomposition would replace the entire code point
692 */
693 --s1;
694 c1=*(s1-1);
695 }
696 }
697
698 /* push current level pointers */
699 stack2[0].start=start2;
700 stack2[0].s=s2;
701 stack2[0].limit=limit2;
702 ++level2;
703
704 /* copy the folding result to fold2[] */
705 if(length<=UCASE_MAX_STRING_LENGTH) {
706 u_memcpy(fold2, p, length);
707 } else {
708 int32_t i=0;
709 U16_APPEND_UNSAFE(fold2, i, length);
710 length=i;
711 }
712
713 /* set next level pointers to case folding */
714 start2=s2=fold2;
715 limit2=fold2+length;
716
717 /* get ready to read from decomposition, continue with loop */
718 c2=-1;
719 continue;
720 }
721
722 /*
723 * no decomposition/case folding, max level for both sides:
724 * return difference result
725 *
726 * code point order comparison must not just return cp1-cp2
727 * because when single surrogates are present then the surrogate pairs
728 * that formed cp1 and cp2 may be from different string indexes
729 *
730 * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
731 * c1=d800 cp1=10001 c2=dc00 cp2=10000
732 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
733 *
734 * therefore, use same fix-up as in ustring.c/uprv_strCompare()
735 * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
736 * so we have slightly different pointer/start/limit comparisons here
737 */
738
739 if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
740 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
741 if(
742 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
743 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
744 ) {
745 /* part of a surrogate pair, leave >=d800 */
746 } else {
747 /* BMP code point - may be surrogate code point - make <d800 */
748 c1-=0x2800;
749 }
750
751 if(
752 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
753 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
754 ) {
755 /* part of a surrogate pair, leave >=d800 */
756 } else {
757 /* BMP code point - may be surrogate code point - make <d800 */
758 c2-=0x2800;
759 }
760 }
761
762 return c1-c2;
763 }
764 }
765
766 /* public API functions */
767
768 U_CAPI int32_t U_EXPORT2
769 u_strCaseCompare(const UChar *s1, int32_t length1,
770 const UChar *s2, int32_t length2,
771 uint32_t options,
772 UErrorCode *pErrorCode) {
773 /* argument checking */
774 if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
775 return 0;
776 }
777 if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
778 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
779 return 0;
780 }
781 return u_strcmpFold(s1, length1, s2, length2,
782 options|U_COMPARE_IGNORE_CASE,
783 pErrorCode);
784 }
785
786 U_CAPI int32_t U_EXPORT2
787 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
788 UErrorCode errorCode=U_ZERO_ERROR;
789 return u_strcmpFold(s1, -1, s2, -1,
790 options|U_COMPARE_IGNORE_CASE,
791 &errorCode);
792 }
793
794 U_CAPI int32_t U_EXPORT2
795 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
796 UErrorCode errorCode=U_ZERO_ERROR;
797 return u_strcmpFold(s1, length, s2, length,
798 options|U_COMPARE_IGNORE_CASE,
799 &errorCode);
800 }
801
802 U_CAPI int32_t U_EXPORT2
803 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
804 UErrorCode errorCode=U_ZERO_ERROR;
805 return u_strcmpFold(s1, n, s2, n,
806 options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
807 &errorCode);
808 }