]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ustrcase.c
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / common / ustrcase.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2001-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: ustrcase.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002feb20
14 * created by: Markus W. Scherer
15 *
16 * Implementation file for string casing C API functions.
17 * Uses functions from uchar.c for basic functionality that requires access
18 * to the Unicode Character Database (uprops.dat).
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ustring.h"
24 #include "unicode/ubrk.h"
25 #include "cmemory.h"
26 #include "ucase.h"
27 #include "unormimp.h"
28 #include "ustr_imp.h"
29
30 /* string casing ------------------------------------------------------------ */
31
32 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
33 static U_INLINE int32_t
34 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
35 int32_t result, const UChar *s) {
36 UChar32 c;
37 int32_t length;
38
39 /* decode the result */
40 if(result<0) {
41 /* (not) original code point */
42 c=~result;
43 length=-1;
44 } else if(result<=UCASE_MAX_STRING_LENGTH) {
45 c=U_SENTINEL;
46 length=result;
47 } else {
48 c=result;
49 length=-1;
50 }
51
52 if(destIndex<destCapacity) {
53 /* append the result */
54 if(length<0) {
55 /* code point */
56 UBool isError=FALSE;
57 U16_APPEND(dest, destIndex, destCapacity, c, isError);
58 if(isError) {
59 /* overflow, nothing written */
60 destIndex+=U16_LENGTH(c);
61 }
62 } else {
63 /* string */
64 if((destIndex+length)<=destCapacity) {
65 while(length>0) {
66 dest[destIndex++]=*s++;
67 --length;
68 }
69 } else {
70 /* overflow */
71 destIndex+=length;
72 }
73 }
74 } else {
75 /* preflight */
76 if(length<0) {
77 destIndex+=U16_LENGTH(c);
78 } else {
79 destIndex+=length;
80 }
81 }
82 return destIndex;
83 }
84
85 static UChar32 U_CALLCONV
86 utf16_caseContextIterator(void *context, int8_t dir) {
87 UCaseContext *csc=(UCaseContext *)context;
88 UChar32 c;
89
90 if(dir<0) {
91 /* reset for backward iteration */
92 csc->index=csc->cpStart;
93 csc->dir=dir;
94 } else if(dir>0) {
95 /* reset for forward iteration */
96 csc->index=csc->cpLimit;
97 csc->dir=dir;
98 } else {
99 /* continue current iteration direction */
100 dir=csc->dir;
101 }
102
103 if(dir<0) {
104 if(csc->start<csc->index) {
105 U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
106 return c;
107 }
108 } else {
109 if(csc->index<csc->limit) {
110 U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
111 return c;
112 }
113 }
114 return U_SENTINEL;
115 }
116
117 typedef int32_t U_CALLCONV
118 UCaseMapFull(const UCaseProps *csp, UChar32 c,
119 UCaseContextIterator *iter, void *context,
120 const UChar **pString,
121 const char *locale, int32_t *locCache);
122
123 /*
124 * Case-maps [srcStart..srcLimit[ but takes
125 * context [0..srcLength[ into account.
126 */
127 static int32_t
128 _caseMap(const UCaseProps *csp, UCaseMapFull *map,
129 UChar *dest, int32_t destCapacity,
130 const UChar *src, UCaseContext *csc,
131 int32_t srcStart, int32_t srcLimit,
132 const char *locale, int32_t *locCache,
133 UErrorCode *pErrorCode) {
134 const UChar *s;
135 UChar32 c, c2;
136 int32_t srcIndex, destIndex;
137
138 /* case mapping loop */
139 srcIndex=srcStart;
140 destIndex=0;
141 while(srcIndex<srcLimit) {
142 csc->cpStart=srcIndex;
143 U16_NEXT(src, srcIndex, srcLimit, c);
144 csc->cpLimit=srcIndex;
145 c=map(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
146 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
147 /* fast path version of appendResult() for BMP results */
148 dest[destIndex++]=(UChar)c2;
149 } else {
150 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
151 }
152 }
153
154 if(destIndex>destCapacity) {
155 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
156 }
157 return destIndex;
158 }
159
160 #if !UCONFIG_NO_BREAK_ITERATION
161
162 /*
163 * Internal titlecasing function.
164 *
165 * Must get titleIter!=NULL.
166 */
167 static int32_t
168 _toTitle(const UCaseProps *csp,
169 UChar *dest, int32_t destCapacity,
170 const UChar *src, UCaseContext *csc,
171 int32_t srcLength,
172 UBreakIterator *titleIter,
173 const char *locale, int32_t *locCache,
174 UErrorCode *pErrorCode) {
175 const UChar *s;
176 UChar32 c;
177 int32_t prev, titleStart, titleLimit, index, destIndex, length;
178 UBool isFirstIndex;
179
180 /* set up local variables */
181 destIndex=0;
182 prev=0;
183 isFirstIndex=TRUE;
184
185 /* titlecasing loop */
186 while(prev<srcLength) {
187 /* find next index where to titlecase */
188 if(isFirstIndex) {
189 isFirstIndex=FALSE;
190 index=ubrk_first(titleIter);
191 } else {
192 index=ubrk_next(titleIter);
193 }
194 if(index==UBRK_DONE || index>srcLength) {
195 index=srcLength;
196 }
197
198 /*
199 * Unicode 4 & 5 section 3.13 Default Case Operations:
200 *
201 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
202 * #29, "Text Boundaries." Between each pair of word boundaries, find the first
203 * cased character F. If F exists, map F to default_title(F); then map each
204 * subsequent character C to default_lower(C).
205 *
206 * In this implementation, segment [prev..index[ into 3 parts:
207 * a) uncased characters (copy as-is) [prev..titleStart[
208 * b) first case letter (titlecase) [titleStart..titleLimit[
209 * c) subsequent characters (lowercase) [titleLimit..index[
210 */
211 if(prev<index) {
212 /* find and copy uncased characters [prev..titleStart[ */
213 titleStart=titleLimit=prev;
214 for(;;) {
215 U16_NEXT(src, titleLimit, srcLength, c);
216 if(UCASE_NONE!=ucase_getType(csp, c)) {
217 break; /* cased letter at [titleStart..titleLimit[ */
218 }
219 titleStart=titleLimit;
220 if(titleLimit==index) {
221 /*
222 * only uncased characters in [prev..index[
223 * stop with titleStart==titleLimit==index
224 */
225 break;
226 }
227 }
228 length=titleStart-prev;
229 if(length>0) {
230 if((destIndex+length)<=destCapacity) {
231 uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
232 }
233 destIndex+=length;
234 }
235
236 if(titleStart<titleLimit) {
237 /* titlecase c which is from [titleStart..titleLimit[ */
238 csc->cpStart=titleStart;
239 csc->cpLimit=titleLimit;
240 c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
241 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
242
243 /* lowercase [titleLimit..index[ */
244 if(titleLimit<index) {
245 destIndex+=
246 _caseMap(
247 csp, ucase_toFullLower,
248 dest+destIndex, destCapacity-destIndex,
249 src, csc,
250 titleLimit, index,
251 locale, locCache,
252 pErrorCode);
253 }
254 }
255 }
256
257 prev=index;
258 }
259
260 if(destIndex>destCapacity) {
261 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
262 }
263 return destIndex;
264 }
265
266 U_CFUNC int32_t
267 ustr_toTitle(const UCaseProps *csp,
268 UChar *dest, int32_t destCapacity,
269 const UChar *src, int32_t srcLength,
270 UBreakIterator *titleIter,
271 const char *locale,
272 UErrorCode *pErrorCode) {
273 UCaseContext csc={ NULL };
274 int32_t locCache;
275
276 csc.p=(void *)src;
277 csc.limit=srcLength;
278 locCache=0;
279
280 return _toTitle(csp,
281 dest, destCapacity,
282 src, &csc, srcLength,
283 titleIter, locale, &locCache, pErrorCode);
284 }
285
286 #endif
287
288 /* functions available in the common library (for unistr_case.cpp) */
289
290 U_CFUNC int32_t
291 ustr_toLower(const UCaseProps *csp,
292 UChar *dest, int32_t destCapacity,
293 const UChar *src, int32_t srcLength,
294 const char *locale,
295 UErrorCode *pErrorCode) {
296 UCaseContext csc={ NULL };
297 int32_t locCache;
298
299 csc.p=(void *)src;
300 csc.limit=srcLength;
301 locCache=0;
302
303 return _caseMap(csp, ucase_toFullLower,
304 dest, destCapacity,
305 src, &csc, 0, srcLength,
306 locale, &locCache, pErrorCode);
307 }
308
309 U_CFUNC int32_t
310 ustr_toUpper(const UCaseProps *csp,
311 UChar *dest, int32_t destCapacity,
312 const UChar *src, int32_t srcLength,
313 const char *locale,
314 UErrorCode *pErrorCode) {
315 UCaseContext csc={ NULL };
316 int32_t locCache;
317
318 csc.p=(void *)src;
319 csc.limit=srcLength;
320 locCache=0;
321
322 return _caseMap(csp, ucase_toFullUpper,
323 dest, destCapacity,
324 src, &csc, 0, srcLength,
325 locale, &locCache, pErrorCode);
326 }
327
328 U_CFUNC int32_t
329 ustr_foldCase(const UCaseProps *csp,
330 UChar *dest, int32_t destCapacity,
331 const UChar *src, int32_t srcLength,
332 uint32_t options,
333 UErrorCode *pErrorCode) {
334 int32_t srcIndex, destIndex;
335
336 const UChar *s;
337 UChar32 c, c2;
338
339 /* case mapping loop */
340 srcIndex=destIndex=0;
341 while(srcIndex<srcLength) {
342 U16_NEXT(src, srcIndex, srcLength, c);
343 c=ucase_toFullFolding(csp, c, &s, options);
344 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
345 /* fast path version of appendResult() for BMP results */
346 dest[destIndex++]=(UChar)c2;
347 } else {
348 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
349 }
350 }
351
352 if(destIndex>destCapacity) {
353 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
354 }
355 return destIndex;
356 }
357
358 /*
359 * Implement argument checking and buffer handling
360 * for string case mapping as a common function.
361 */
362 enum {
363 TO_LOWER,
364 TO_UPPER,
365 TO_TITLE,
366 FOLD_CASE
367 };
368
369 /* common internal function for public API functions */
370
371 static int32_t
372 caseMap(UChar *dest, int32_t destCapacity,
373 const UChar *src, int32_t srcLength,
374 UBreakIterator *titleIter,
375 const char *locale,
376 uint32_t options,
377 int32_t toWhichCase,
378 UErrorCode *pErrorCode) {
379 UChar buffer[300];
380 UChar *temp;
381
382 const UCaseProps *csp;
383
384 int32_t destLength;
385 UBool ownTitleIter;
386
387 /* check argument values */
388 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
389 return 0;
390 }
391 if( destCapacity<0 ||
392 (dest==NULL && destCapacity>0) ||
393 src==NULL ||
394 srcLength<-1
395 ) {
396 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
397 return 0;
398 }
399
400 csp=ucase_getSingleton(pErrorCode);
401 if(U_FAILURE(*pErrorCode)) {
402 return 0;
403 }
404
405 /* get the string length */
406 if(srcLength==-1) {
407 srcLength=u_strlen(src);
408 }
409
410 /* check for overlapping source and destination */
411 if( dest!=NULL &&
412 ((src>=dest && src<(dest+destCapacity)) ||
413 (dest>=src && dest<(src+srcLength)))
414 ) {
415 /* overlap: provide a temporary destination buffer and later copy the result */
416 if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) {
417 /* the stack buffer is large enough */
418 temp=buffer;
419 } else {
420 /* allocate a buffer */
421 temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
422 if(temp==NULL) {
423 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
424 return 0;
425 }
426 }
427 } else {
428 temp=dest;
429 }
430
431 ownTitleIter=FALSE;
432 destLength=0;
433
434 if(toWhichCase==FOLD_CASE) {
435 destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength,
436 options, pErrorCode);
437 } else {
438 UCaseContext csc={ NULL };
439 int32_t locCache;
440
441 csc.p=(void *)src;
442 csc.limit=srcLength;
443 locCache=0;
444
445 /* the internal functions require locale!=NULL */
446 if(locale==NULL) {
447 locale=uloc_getDefault();
448 }
449
450 if(toWhichCase==TO_LOWER) {
451 destLength=_caseMap(csp, ucase_toFullLower,
452 temp, destCapacity,
453 src, &csc,
454 0, srcLength,
455 locale, &locCache, pErrorCode);
456 } else if(toWhichCase==TO_UPPER) {
457 destLength=_caseMap(csp, ucase_toFullUpper,
458 temp, destCapacity,
459 src, &csc,
460 0, srcLength,
461 locale, &locCache, pErrorCode);
462 } else /* if(toWhichCase==TO_TITLE) */ {
463 #if UCONFIG_NO_BREAK_ITERATION
464 *pErrorCode=U_UNSUPPORTED_ERROR;
465 #else
466 if(titleIter==NULL) {
467 titleIter=ubrk_open(UBRK_WORD, locale,
468 src, srcLength,
469 pErrorCode);
470 ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
471 }
472 if(U_SUCCESS(*pErrorCode)) {
473 destLength=_toTitle(csp, temp, destCapacity,
474 src, &csc, srcLength,
475 titleIter, locale, &locCache, pErrorCode);
476 }
477 #endif
478 }
479 }
480 if(temp!=dest) {
481 /* copy the result string to the destination buffer */
482 if(destLength>0) {
483 int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
484 if(copyLength>0) {
485 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
486 }
487 }
488 if(temp!=buffer) {
489 uprv_free(temp);
490 }
491 }
492
493 #if !UCONFIG_NO_BREAK_ITERATION
494 if(ownTitleIter) {
495 ubrk_close(titleIter);
496 }
497 #endif
498
499 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
500 }
501
502 /* public API functions */
503
504 U_CAPI int32_t U_EXPORT2
505 u_strToLower(UChar *dest, int32_t destCapacity,
506 const UChar *src, int32_t srcLength,
507 const char *locale,
508 UErrorCode *pErrorCode) {
509 return caseMap(dest, destCapacity,
510 src, srcLength,
511 NULL, locale, 0,
512 TO_LOWER, pErrorCode);
513 }
514
515 U_CAPI int32_t U_EXPORT2
516 u_strToUpper(UChar *dest, int32_t destCapacity,
517 const UChar *src, int32_t srcLength,
518 const char *locale,
519 UErrorCode *pErrorCode) {
520 return caseMap(dest, destCapacity,
521 src, srcLength,
522 NULL, locale, 0,
523 TO_UPPER, pErrorCode);
524 }
525
526 #if !UCONFIG_NO_BREAK_ITERATION
527
528 U_CAPI int32_t U_EXPORT2
529 u_strToTitle(UChar *dest, int32_t destCapacity,
530 const UChar *src, int32_t srcLength,
531 UBreakIterator *titleIter,
532 const char *locale,
533 UErrorCode *pErrorCode) {
534 return caseMap(dest, destCapacity,
535 src, srcLength,
536 titleIter, locale, 0,
537 TO_TITLE, pErrorCode);
538 }
539
540 #endif
541
542 U_CAPI int32_t U_EXPORT2
543 u_strFoldCase(UChar *dest, int32_t destCapacity,
544 const UChar *src, int32_t srcLength,
545 uint32_t options,
546 UErrorCode *pErrorCode) {
547 return caseMap(dest, destCapacity,
548 src, srcLength,
549 NULL, NULL, options,
550 FOLD_CASE, pErrorCode);
551 }
552
553 /* case-insensitive string comparisons -------------------------------------- */
554
555 /*
556 * This function is a copy of unorm_cmpEquivFold() minus the parts for
557 * canonical equivalence.
558 * Keep the functions in sync, and see there for how this works.
559 * The duplication is for modularization:
560 * It makes caseless (but not canonical caseless) matches independent of
561 * the normalization code.
562 */
563
564 /* stack element for previous-level source/decomposition pointers */
565 struct CmpEquivLevel {
566 const UChar *start, *s, *limit;
567 };
568 typedef struct CmpEquivLevel CmpEquivLevel;
569
570 /* internal function */
571 U_CFUNC int32_t
572 u_strcmpFold(const UChar *s1, int32_t length1,
573 const UChar *s2, int32_t length2,
574 uint32_t options,
575 UErrorCode *pErrorCode) {
576 const UCaseProps *csp;
577
578 /* current-level start/limit - s1/s2 as current */
579 const UChar *start1, *start2, *limit1, *limit2;
580
581 /* case folding variables */
582 const UChar *p;
583 int32_t length;
584
585 /* stacks of previous-level start/current/limit */
586 CmpEquivLevel stack1[2], stack2[2];
587
588 /* case folding buffers, only use current-level start/limit */
589 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
590
591 /* track which is the current level per string */
592 int32_t level1, level2;
593
594 /* current code units, and code points for lookups */
595 UChar32 c1, c2, cp1, cp2;
596
597 /* no argument error checking because this itself is not an API */
598
599 /*
600 * assume that at least the option U_COMPARE_IGNORE_CASE is set
601 * otherwise this function would have to behave exactly as uprv_strCompare()
602 */
603 csp=ucase_getSingleton(pErrorCode);
604 if(U_FAILURE(*pErrorCode)) {
605 return 0;
606 }
607
608 /* initialize */
609 start1=s1;
610 if(length1==-1) {
611 limit1=NULL;
612 } else {
613 limit1=s1+length1;
614 }
615
616 start2=s2;
617 if(length2==-1) {
618 limit2=NULL;
619 } else {
620 limit2=s2+length2;
621 }
622
623 level1=level2=0;
624 c1=c2=-1;
625
626 /* comparison loop */
627 for(;;) {
628 /*
629 * here a code unit value of -1 means "get another code unit"
630 * below it will mean "this source is finished"
631 */
632
633 if(c1<0) {
634 /* get next code unit from string 1, post-increment */
635 for(;;) {
636 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
637 if(level1==0) {
638 c1=-1;
639 break;
640 }
641 } else {
642 ++s1;
643 break;
644 }
645
646 /* reached end of level buffer, pop one level */
647 do {
648 --level1;
649 start1=stack1[level1].start;
650 } while(start1==NULL);
651 s1=stack1[level1].s;
652 limit1=stack1[level1].limit;
653 }
654 }
655
656 if(c2<0) {
657 /* get next code unit from string 2, post-increment */
658 for(;;) {
659 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
660 if(level2==0) {
661 c2=-1;
662 break;
663 }
664 } else {
665 ++s2;
666 break;
667 }
668
669 /* reached end of level buffer, pop one level */
670 do {
671 --level2;
672 start2=stack2[level2].start;
673 } while(start2==NULL);
674 s2=stack2[level2].s;
675 limit2=stack2[level2].limit;
676 }
677 }
678
679 /*
680 * compare c1 and c2
681 * either variable c1, c2 is -1 only if the corresponding string is finished
682 */
683 if(c1==c2) {
684 if(c1<0) {
685 return 0; /* c1==c2==-1 indicating end of strings */
686 }
687 c1=c2=-1; /* make us fetch new code units */
688 continue;
689 } else if(c1<0) {
690 return -1; /* string 1 ends before string 2 */
691 } else if(c2<0) {
692 return 1; /* string 2 ends before string 1 */
693 }
694 /* c1!=c2 && c1>=0 && c2>=0 */
695
696 /* get complete code points for c1, c2 for lookups if either is a surrogate */
697 cp1=c1;
698 if(U_IS_SURROGATE(c1)) {
699 UChar c;
700
701 if(U_IS_SURROGATE_LEAD(c1)) {
702 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
703 /* advance ++s1; only below if cp1 decomposes/case-folds */
704 cp1=U16_GET_SUPPLEMENTARY(c1, c);
705 }
706 } else /* isTrail(c1) */ {
707 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
708 cp1=U16_GET_SUPPLEMENTARY(c, c1);
709 }
710 }
711 }
712
713 cp2=c2;
714 if(U_IS_SURROGATE(c2)) {
715 UChar c;
716
717 if(U_IS_SURROGATE_LEAD(c2)) {
718 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
719 /* advance ++s2; only below if cp2 decomposes/case-folds */
720 cp2=U16_GET_SUPPLEMENTARY(c2, c);
721 }
722 } else /* isTrail(c2) */ {
723 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
724 cp2=U16_GET_SUPPLEMENTARY(c, c2);
725 }
726 }
727 }
728
729 /*
730 * go down one level for each string
731 * continue with the main loop as soon as there is a real change
732 */
733
734 if( level1==0 &&
735 (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
736 ) {
737 /* cp1 case-folds to the code point "length" or to p[length] */
738 if(U_IS_SURROGATE(c1)) {
739 if(U_IS_SURROGATE_LEAD(c1)) {
740 /* advance beyond source surrogate pair if it case-folds */
741 ++s1;
742 } else /* isTrail(c1) */ {
743 /*
744 * we got a supplementary code point when hitting its trail surrogate,
745 * therefore the lead surrogate must have been the same as in the other string;
746 * compare this decomposition with the lead surrogate in the other string
747 * remember that this simulates bulk text replacement:
748 * the decomposition would replace the entire code point
749 */
750 --s2;
751 c2=*(s2-1);
752 }
753 }
754
755 /* push current level pointers */
756 stack1[0].start=start1;
757 stack1[0].s=s1;
758 stack1[0].limit=limit1;
759 ++level1;
760
761 /* copy the folding result to fold1[] */
762 if(length<=UCASE_MAX_STRING_LENGTH) {
763 u_memcpy(fold1, p, length);
764 } else {
765 int32_t i=0;
766 U16_APPEND_UNSAFE(fold1, i, length);
767 length=i;
768 }
769
770 /* set next level pointers to case folding */
771 start1=s1=fold1;
772 limit1=fold1+length;
773
774 /* get ready to read from decomposition, continue with loop */
775 c1=-1;
776 continue;
777 }
778
779 if( level2==0 &&
780 (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
781 ) {
782 /* cp2 case-folds to the code point "length" or to p[length] */
783 if(U_IS_SURROGATE(c2)) {
784 if(U_IS_SURROGATE_LEAD(c2)) {
785 /* advance beyond source surrogate pair if it case-folds */
786 ++s2;
787 } else /* isTrail(c2) */ {
788 /*
789 * we got a supplementary code point when hitting its trail surrogate,
790 * therefore the lead surrogate must have been the same as in the other string;
791 * compare this decomposition with the lead surrogate in the other string
792 * remember that this simulates bulk text replacement:
793 * the decomposition would replace the entire code point
794 */
795 --s1;
796 c1=*(s1-1);
797 }
798 }
799
800 /* push current level pointers */
801 stack2[0].start=start2;
802 stack2[0].s=s2;
803 stack2[0].limit=limit2;
804 ++level2;
805
806 /* copy the folding result to fold2[] */
807 if(length<=UCASE_MAX_STRING_LENGTH) {
808 u_memcpy(fold2, p, length);
809 } else {
810 int32_t i=0;
811 U16_APPEND_UNSAFE(fold2, i, length);
812 length=i;
813 }
814
815 /* set next level pointers to case folding */
816 start2=s2=fold2;
817 limit2=fold2+length;
818
819 /* get ready to read from decomposition, continue with loop */
820 c2=-1;
821 continue;
822 }
823
824 /*
825 * no decomposition/case folding, max level for both sides:
826 * return difference result
827 *
828 * code point order comparison must not just return cp1-cp2
829 * because when single surrogates are present then the surrogate pairs
830 * that formed cp1 and cp2 may be from different string indexes
831 *
832 * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
833 * c1=d800 cp1=10001 c2=dc00 cp2=10000
834 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
835 *
836 * therefore, use same fix-up as in ustring.c/uprv_strCompare()
837 * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
838 * so we have slightly different pointer/start/limit comparisons here
839 */
840
841 if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
842 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
843 if(
844 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
845 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
846 ) {
847 /* part of a surrogate pair, leave >=d800 */
848 } else {
849 /* BMP code point - may be surrogate code point - make <d800 */
850 c1-=0x2800;
851 }
852
853 if(
854 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
855 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
856 ) {
857 /* part of a surrogate pair, leave >=d800 */
858 } else {
859 /* BMP code point - may be surrogate code point - make <d800 */
860 c2-=0x2800;
861 }
862 }
863
864 return c1-c2;
865 }
866 }
867
868 /* public API functions */
869
870 U_CAPI int32_t U_EXPORT2
871 u_strCaseCompare(const UChar *s1, int32_t length1,
872 const UChar *s2, int32_t length2,
873 uint32_t options,
874 UErrorCode *pErrorCode) {
875 /* argument checking */
876 if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
877 return 0;
878 }
879 if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
880 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
881 return 0;
882 }
883 return u_strcmpFold(s1, length1, s2, length2,
884 options|U_COMPARE_IGNORE_CASE,
885 pErrorCode);
886 }
887
888 U_CAPI int32_t U_EXPORT2
889 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
890 UErrorCode errorCode=U_ZERO_ERROR;
891 return u_strcmpFold(s1, -1, s2, -1,
892 options|U_COMPARE_IGNORE_CASE,
893 &errorCode);
894 }
895
896 U_CAPI int32_t U_EXPORT2
897 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
898 UErrorCode errorCode=U_ZERO_ERROR;
899 return u_strcmpFold(s1, length, s2, length,
900 options|U_COMPARE_IGNORE_CASE,
901 &errorCode);
902 }
903
904 U_CAPI int32_t U_EXPORT2
905 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
906 UErrorCode errorCode=U_ZERO_ERROR;
907 return u_strcmpFold(s1, n, s2, n,
908 options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
909 &errorCode);
910 }