icuSources/common/ustrcase.c

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 2001-2006, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  ustrcase.c
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2002feb20
  14 *   created by: Markus W. Scherer
  15 *
  16 *   Implementation file for string casing C API functions.
  17 *   Uses functions from uchar.c for basic functionality that requires access
  18 *   to the Unicode Character Database (uprops.dat).
  19 */
  20
  21 #include "unicode/utypes.h"
  22 #include "unicode/uloc.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/ubrk.h"
  25 #include "cmemory.h"
  26 #include "ucase.h"
  27 #include "unormimp.h"
  28 #include "ustr_imp.h"
  29
  30 /* string casing ------------------------------------------------------------ */
  31
  32 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
  33 static U_INLINE int32_t
  34 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
  35              int32_t result, const UChar *s) {
  36     UChar32 c;
  37     int32_t length;
  38
  39     /* decode the result */
  40     if(result<0) {
  41         /* (not) original code point */
  42         c=~result;
  43         length=-1;
  44     } else if(result<=UCASE_MAX_STRING_LENGTH) {
  45         c=U_SENTINEL;
  46         length=result;
  47     } else {
  48         c=result;
  49         length=-1;
  50     }
  51
  52     if(destIndex<destCapacity) {
  53         /* append the result */
  54         if(length<0) {
  55             /* code point */
  56             UBool isError=FALSE;
  57             U16_APPEND(dest, destIndex, destCapacity, c, isError);
  58             if(isError) {
  59                 /* overflow, nothing written */
  60                 destIndex+=U16_LENGTH(c);
  61             }
  62         } else {
  63             /* string */
  64             if((destIndex+length)<=destCapacity) {
  65                 while(length>0) {
  66                     dest[destIndex++]=*s++;
  67                     --length;
  68                 }
  69             } else {
  70                 /* overflow */
  71                 destIndex+=length;
  72             }
  73         }
  74     } else {
  75         /* preflight */
  76         if(length<0) {
  77             destIndex+=U16_LENGTH(c);
  78         } else {
  79             destIndex+=length;
  80         }
  81     }
  82     return destIndex;
  83 }
  84
  85 static UChar32 U_CALLCONV
  86 utf16_caseContextIterator(void *context, int8_t dir) {
  87     UCaseContext *csc=(UCaseContext *)context;
  88     UChar32 c;
  89
  90     if(dir<0) {
  91         /* reset for backward iteration */
  92         csc->index=csc->cpStart;
  93         csc->dir=dir;
  94     } else if(dir>0) {
  95         /* reset for forward iteration */
  96         csc->index=csc->cpLimit;
  97         csc->dir=dir;
  98     } else {
  99         /* continue current iteration direction */
 100         dir=csc->dir;
 101     }
 102
 103     if(dir<0) {
 104         if(csc->start<csc->index) {
 105             U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
 106             return c;
 107         }
 108     } else {
 109         if(csc->index<csc->limit) {
 110             U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
 111             return c;
 112         }
 113     }
 114     return U_SENTINEL;
 115 }
 116
 117 typedef int32_t U_CALLCONV
 118 UCaseMapFull(const UCaseProps *csp, UChar32 c,
 119              UCaseContextIterator *iter, void *context,
 120              const UChar **pString,
 121              const char *locale, int32_t *locCache);
 122
 123 /*
 124  * Case-maps [srcStart..srcLimit[ but takes
 125  * context [0..srcLength[ into account.
 126  */
 127 static int32_t
 128 _caseMap(const UCaseProps *csp, UCaseMapFull *map,
 129          UChar *dest, int32_t destCapacity,
 130          const UChar *src, UCaseContext *csc,
 131          int32_t srcStart, int32_t srcLimit,
 132          const char *locale, int32_t *locCache,
 133          UErrorCode *pErrorCode) {
 134     const UChar *s;
 135     UChar32 c, c2;
 136     int32_t srcIndex, destIndex;
 137
 138     /* case mapping loop */
 139     srcIndex=srcStart;
 140     destIndex=0;
 141     while(srcIndex<srcLimit) {
 142         csc->cpStart=srcIndex;
 143         U16_NEXT(src, srcIndex, srcLimit, c);
 144         csc->cpLimit=srcIndex;
 145         c=map(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
 146         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
 147             /* fast path version of appendResult() for BMP results */
 148             dest[destIndex++]=(UChar)c2;
 149         } else {
 150             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 151         }
 152     }
 153
 154     if(destIndex>destCapacity) {
 155         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 156     }
 157     return destIndex;
 158 }
 159
 160 #if !UCONFIG_NO_BREAK_ITERATION
 161
 162 /*
 163  * Internal titlecasing function.
 164  *
 165  * Must get titleIter!=NULL.
 166  */
 167 static int32_t
 168 _toTitle(const UCaseProps *csp,
 169          UChar *dest, int32_t destCapacity,
 170          const UChar *src, UCaseContext *csc,
 171          int32_t srcLength,
 172          UBreakIterator *titleIter,
 173          const char *locale, int32_t *locCache,
 174          UErrorCode *pErrorCode) {
 175     const UChar *s;
 176     UChar32 c;
 177     int32_t prev, titleStart, titleLimit, index, destIndex, length;
 178     UBool isFirstIndex;
 179
 180     /* set up local variables */
 181     destIndex=0;
 182     prev=0;
 183     isFirstIndex=TRUE;
 184
 185     /* titlecasing loop */
 186     while(prev<srcLength) {
 187         /* find next index where to titlecase */
 188         if(isFirstIndex) {
 189             isFirstIndex=FALSE;
 190             index=ubrk_first(titleIter);
 191         } else {
 192             index=ubrk_next(titleIter);
 193         }
 194         if(index==UBRK_DONE || index>srcLength) {
 195             index=srcLength;
 196         }
 197
 198         /*
 199          * Unicode 4 & 5 section 3.13 Default Case Operations:
 200          *
 201          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
 202          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
 203          * cased character F. If F exists, map F to default_title(F); then map each
 204          * subsequent character C to default_lower(C).
 205          *
 206          * In this implementation, segment [prev..index[ into 3 parts:
 207          * a) uncased characters (copy as-is) [prev..titleStart[
 208          * b) first case letter (titlecase)         [titleStart..titleLimit[
 209          * c) subsequent characters (lowercase)                 [titleLimit..index[
 210          */
 211         if(prev<index) {
 212             /* find and copy uncased characters [prev..titleStart[ */
 213             titleStart=titleLimit=prev;
 214             for(;;) {
 215                 U16_NEXT(src, titleLimit, srcLength, c);
 216                 if(UCASE_NONE!=ucase_getType(csp, c)) {
 217                     break; /* cased letter at [titleStart..titleLimit[ */
 218                 }
 219                 titleStart=titleLimit;
 220                 if(titleLimit==index) {
 221                     /*
 222                      * only uncased characters in [prev..index[
 223                      * stop with titleStart==titleLimit==index
 224                      */
 225                     break;
 226                 }
 227             }
 228             length=titleStart-prev;
 229             if(length>0) {
 230                 if((destIndex+length)<=destCapacity) {
 231                     uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
 232                 }
 233                 destIndex+=length;
 234             }
 235
 236             if(titleStart<titleLimit) {
 237                 /* titlecase c which is from [titleStart..titleLimit[ */
 238                 csc->cpStart=titleStart;
 239                 csc->cpLimit=titleLimit;
 240                 c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
 241                 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 242
 243                 /* lowercase [titleLimit..index[ */
 244                 if(titleLimit<index) {
 245                     destIndex+=
 246                         _caseMap(
 247                             csp, ucase_toFullLower,
 248                             dest+destIndex, destCapacity-destIndex,
 249                             src, csc,
 250                             titleLimit, index,
 251                             locale, locCache,
 252                             pErrorCode);
 253                 }
 254             }
 255         }
 256
 257         prev=index;
 258     }
 259
 260     if(destIndex>destCapacity) {
 261         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 262     }
 263     return destIndex;
 264 }
 265
 266 U_CFUNC int32_t
 267 ustr_toTitle(const UCaseProps *csp,
 268              UChar *dest, int32_t destCapacity,
 269              const UChar *src, int32_t srcLength,
 270              UBreakIterator *titleIter,
 271              const char *locale,
 272              UErrorCode *pErrorCode) {
 273     UCaseContext csc={ NULL };
 274     int32_t locCache;
 275
 276     csc.p=(void *)src;
 277     csc.limit=srcLength;
 278     locCache=0;
 279
 280     return _toTitle(csp,
 281                     dest, destCapacity,
 282                     src, &csc, srcLength,
 283                     titleIter, locale, &locCache, pErrorCode);
 284 }
 285
 286 #endif
 287
 288 /* functions available in the common library (for unistr_case.cpp) */
 289
 290 U_CFUNC int32_t
 291 ustr_toLower(const UCaseProps *csp,
 292              UChar *dest, int32_t destCapacity,
 293              const UChar *src, int32_t srcLength,
 294              const char *locale,
 295              UErrorCode *pErrorCode) {
 296     UCaseContext csc={ NULL };
 297     int32_t locCache;
 298
 299     csc.p=(void *)src;
 300     csc.limit=srcLength;
 301     locCache=0;
 302
 303     return _caseMap(csp, ucase_toFullLower,
 304                     dest, destCapacity,
 305                     src, &csc, 0, srcLength,
 306                     locale, &locCache, pErrorCode);
 307 }
 308
 309 U_CFUNC int32_t
 310 ustr_toUpper(const UCaseProps *csp,
 311              UChar *dest, int32_t destCapacity,
 312              const UChar *src, int32_t srcLength,
 313              const char *locale,
 314              UErrorCode *pErrorCode) {
 315     UCaseContext csc={ NULL };
 316     int32_t locCache;
 317
 318     csc.p=(void *)src;
 319     csc.limit=srcLength;
 320     locCache=0;
 321
 322     return _caseMap(csp, ucase_toFullUpper,
 323                     dest, destCapacity,
 324                     src, &csc, 0, srcLength,
 325                     locale, &locCache, pErrorCode);
 326 }
 327
 328 U_CFUNC int32_t
 329 ustr_foldCase(const UCaseProps *csp,
 330               UChar *dest, int32_t destCapacity,
 331               const UChar *src, int32_t srcLength,
 332               uint32_t options,
 333               UErrorCode *pErrorCode) {
 334     int32_t srcIndex, destIndex;
 335
 336     const UChar *s;
 337     UChar32 c, c2;
 338
 339     /* case mapping loop */
 340     srcIndex=destIndex=0;
 341     while(srcIndex<srcLength) {
 342         U16_NEXT(src, srcIndex, srcLength, c);
 343         c=ucase_toFullFolding(csp, c, &s, options);
 344         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
 345             /* fast path version of appendResult() for BMP results */
 346             dest[destIndex++]=(UChar)c2;
 347         } else {
 348             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 349         }
 350     }
 351
 352     if(destIndex>destCapacity) {
 353         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 354     }
 355     return destIndex;
 356 }
 357
 358 /*
 359  * Implement argument checking and buffer handling
 360  * for string case mapping as a common function.
 361  */
 362 enum {
 363     TO_LOWER,
 364     TO_UPPER,
 365     TO_TITLE,
 366     FOLD_CASE
 367 };
 368
 369 /* common internal function for public API functions */
 370
 371 static int32_t
 372 caseMap(UChar *dest, int32_t destCapacity,
 373         const UChar *src, int32_t srcLength,
 374         UBreakIterator *titleIter,
 375         const char *locale,
 376         uint32_t options,
 377         int32_t toWhichCase,
 378         UErrorCode *pErrorCode) {
 379     UChar buffer[300];
 380     UChar *temp;
 381
 382     const UCaseProps *csp;
 383
 384     int32_t destLength;
 385     UBool ownTitleIter;
 386
 387     /* check argument values */
 388     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
 389         return 0;
 390     }
 391     if( destCapacity<0 ||
 392         (dest==NULL && destCapacity>0) ||
 393         src==NULL ||
 394         srcLength<-1
 395     ) {
 396         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
 397         return 0;
 398     }
 399
 400     csp=ucase_getSingleton(pErrorCode);
 401     if(U_FAILURE(*pErrorCode)) {
 402         return 0;
 403     }
 404
 405     /* get the string length */
 406     if(srcLength==-1) {
 407         srcLength=u_strlen(src);
 408     }
 409
 410     /* check for overlapping source and destination */
 411     if( dest!=NULL &&
 412         ((src>=dest && src<(dest+destCapacity)) ||
 413          (dest>=src && dest<(src+srcLength)))
 414     ) {
 415         /* overlap: provide a temporary destination buffer and later copy the result */
 416         if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) {
 417             /* the stack buffer is large enough */
 418             temp=buffer;
 419         } else {
 420             /* allocate a buffer */
 421             temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
 422             if(temp==NULL) {
 423                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
 424                 return 0;
 425             }
 426         }
 427     } else {
 428         temp=dest;
 429     }
 430
 431     ownTitleIter=FALSE;
 432     destLength=0;
 433
 434     if(toWhichCase==FOLD_CASE) {
 435         destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength,
 436                                  options, pErrorCode);
 437     } else {
 438         UCaseContext csc={ NULL };
 439         int32_t locCache;
 440
 441         csc.p=(void *)src;
 442         csc.limit=srcLength;
 443         locCache=0;
 444
 445         /* the internal functions require locale!=NULL */
 446         if(locale==NULL) {
 447             locale=uloc_getDefault();
 448         }
 449
 450         if(toWhichCase==TO_LOWER) {
 451             destLength=_caseMap(csp, ucase_toFullLower,
 452                                 temp, destCapacity,
 453                                 src, &csc,
 454                                 0, srcLength,
 455                                 locale, &locCache, pErrorCode);
 456         } else if(toWhichCase==TO_UPPER) {
 457             destLength=_caseMap(csp, ucase_toFullUpper,
 458                                 temp, destCapacity,
 459                                 src, &csc,
 460                                 0, srcLength,
 461                                 locale, &locCache, pErrorCode);
 462         } else /* if(toWhichCase==TO_TITLE) */ {
 463     #if UCONFIG_NO_BREAK_ITERATION
 464             *pErrorCode=U_UNSUPPORTED_ERROR;
 465     #else
 466             if(titleIter==NULL) {
 467                 titleIter=ubrk_open(UBRK_WORD, locale,
 468                                     src, srcLength,
 469                                     pErrorCode);
 470                 ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
 471             }
 472             if(U_SUCCESS(*pErrorCode)) {
 473                 destLength=_toTitle(csp, temp, destCapacity,
 474                                     src, &csc, srcLength,
 475                                     titleIter, locale, &locCache, pErrorCode);
 476             }
 477     #endif
 478         }
 479     }
 480     if(temp!=dest) {
 481         /* copy the result string to the destination buffer */
 482         if(destLength>0) {
 483             int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
 484             if(copyLength>0) {
 485                 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
 486             }
 487         }
 488         if(temp!=buffer) {
 489             uprv_free(temp);
 490         }
 491     }
 492
 493 #if !UCONFIG_NO_BREAK_ITERATION
 494     if(ownTitleIter) {
 495         ubrk_close(titleIter);
 496     }
 497 #endif
 498
 499     return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
 500 }
 501
 502 /* public API functions */
 503
 504 U_CAPI int32_t U_EXPORT2
 505 u_strToLower(UChar *dest, int32_t destCapacity,
 506              const UChar *src, int32_t srcLength,
 507              const char *locale,
 508              UErrorCode *pErrorCode) {
 509     return caseMap(dest, destCapacity,
 510                    src, srcLength,
 511                    NULL, locale, 0,
 512                    TO_LOWER, pErrorCode);
 513 }
 514
 515 U_CAPI int32_t U_EXPORT2
 516 u_strToUpper(UChar *dest, int32_t destCapacity,
 517              const UChar *src, int32_t srcLength,
 518              const char *locale,
 519              UErrorCode *pErrorCode) {
 520     return caseMap(dest, destCapacity,
 521                    src, srcLength,
 522                    NULL, locale, 0,
 523                    TO_UPPER, pErrorCode);
 524 }
 525
 526 #if !UCONFIG_NO_BREAK_ITERATION
 527
 528 U_CAPI int32_t U_EXPORT2
 529 u_strToTitle(UChar *dest, int32_t destCapacity,
 530              const UChar *src, int32_t srcLength,
 531              UBreakIterator *titleIter,
 532              const char *locale,
 533              UErrorCode *pErrorCode) {
 534     return caseMap(dest, destCapacity,
 535                    src, srcLength,
 536                    titleIter, locale, 0,
 537                    TO_TITLE, pErrorCode);
 538 }
 539
 540 #endif
 541
 542 U_CAPI int32_t U_EXPORT2
 543 u_strFoldCase(UChar *dest, int32_t destCapacity,
 544               const UChar *src, int32_t srcLength,
 545               uint32_t options,
 546               UErrorCode *pErrorCode) {
 547     return caseMap(dest, destCapacity,
 548                    src, srcLength,
 549                    NULL, NULL, options,
 550                    FOLD_CASE, pErrorCode);
 551 }
 552
 553 /* case-insensitive string comparisons -------------------------------------- */
 554
 555 /*
 556  * This function is a copy of unorm_cmpEquivFold() minus the parts for
 557  * canonical equivalence.
 558  * Keep the functions in sync, and see there for how this works.
 559  * The duplication is for modularization:
 560  * It makes caseless (but not canonical caseless) matches independent of
 561  * the normalization code.
 562  */
 563
 564 /* stack element for previous-level source/decomposition pointers */
 565 struct CmpEquivLevel {
 566     const UChar *start, *s, *limit;
 567 };
 568 typedef struct CmpEquivLevel CmpEquivLevel;
 569
 570 /* internal function */
 571 U_CFUNC int32_t
 572 u_strcmpFold(const UChar *s1, int32_t length1,
 573              const UChar *s2, int32_t length2,
 574              uint32_t options,
 575              UErrorCode *pErrorCode) {
 576     const UCaseProps *csp;
 577
 578     /* current-level start/limit - s1/s2 as current */
 579     const UChar *start1, *start2, *limit1, *limit2;
 580
 581     /* case folding variables */
 582     const UChar *p;
 583     int32_t length;
 584
 585     /* stacks of previous-level start/current/limit */
 586     CmpEquivLevel stack1[2], stack2[2];
 587
 588     /* case folding buffers, only use current-level start/limit */
 589     UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
 590
 591     /* track which is the current level per string */
 592     int32_t level1, level2;
 593
 594     /* current code units, and code points for lookups */
 595     UChar32 c1, c2, cp1, cp2;
 596
 597     /* no argument error checking because this itself is not an API */
 598
 599     /*
 600      * assume that at least the option U_COMPARE_IGNORE_CASE is set
 601      * otherwise this function would have to behave exactly as uprv_strCompare()
 602      */
 603     csp=ucase_getSingleton(pErrorCode);
 604     if(U_FAILURE(*pErrorCode)) {
 605         return 0;
 606     }
 607
 608     /* initialize */
 609     start1=s1;
 610     if(length1==-1) {
 611         limit1=NULL;
 612     } else {
 613         limit1=s1+length1;
 614     }
 615
 616     start2=s2;
 617     if(length2==-1) {
 618         limit2=NULL;
 619     } else {
 620         limit2=s2+length2;
 621     }
 622
 623     level1=level2=0;
 624     c1=c2=-1;
 625
 626     /* comparison loop */
 627     for(;;) {
 628         /*
 629          * here a code unit value of -1 means "get another code unit"
 630          * below it will mean "this source is finished"
 631          */
 632
 633         if(c1<0) {
 634             /* get next code unit from string 1, post-increment */
 635             for(;;) {
 636                 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
 637                     if(level1==0) {
 638                         c1=-1;
 639                         break;
 640                     }
 641                 } else {
 642                     ++s1;
 643                     break;
 644                 }
 645
 646                 /* reached end of level buffer, pop one level */
 647                 do {
 648                     --level1;
 649                     start1=stack1[level1].start;
 650                 } while(start1==NULL);
 651                 s1=stack1[level1].s;
 652                 limit1=stack1[level1].limit;
 653             }
 654         }
 655
 656         if(c2<0) {
 657             /* get next code unit from string 2, post-increment */
 658             for(;;) {
 659                 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
 660                     if(level2==0) {
 661                         c2=-1;
 662                         break;
 663                     }
 664                 } else {
 665                     ++s2;
 666                     break;
 667                 }
 668
 669                 /* reached end of level buffer, pop one level */
 670                 do {
 671                     --level2;
 672                     start2=stack2[level2].start;
 673                 } while(start2==NULL);
 674                 s2=stack2[level2].s;
 675                 limit2=stack2[level2].limit;
 676             }
 677         }
 678
 679         /*
 680          * compare c1 and c2
 681          * either variable c1, c2 is -1 only if the corresponding string is finished
 682          */
 683         if(c1==c2) {
 684             if(c1<0) {
 685                 return 0;   /* c1==c2==-1 indicating end of strings */
 686             }
 687             c1=c2=-1;       /* make us fetch new code units */
 688             continue;
 689         } else if(c1<0) {
 690             return -1;      /* string 1 ends before string 2 */
 691         } else if(c2<0) {
 692             return 1;       /* string 2 ends before string 1 */
 693         }
 694         /* c1!=c2 && c1>=0 && c2>=0 */
 695
 696         /* get complete code points for c1, c2 for lookups if either is a surrogate */
 697         cp1=c1;
 698         if(U_IS_SURROGATE(c1)) {
 699             UChar c;
 700
 701             if(U_IS_SURROGATE_LEAD(c1)) {
 702                 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
 703                     /* advance ++s1; only below if cp1 decomposes/case-folds */
 704                     cp1=U16_GET_SUPPLEMENTARY(c1, c);
 705                 }
 706             } else /* isTrail(c1) */ {
 707                 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
 708                     cp1=U16_GET_SUPPLEMENTARY(c, c1);
 709                 }
 710             }
 711         }
 712
 713         cp2=c2;
 714         if(U_IS_SURROGATE(c2)) {
 715             UChar c;
 716
 717             if(U_IS_SURROGATE_LEAD(c2)) {
 718                 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
 719                     /* advance ++s2; only below if cp2 decomposes/case-folds */
 720                     cp2=U16_GET_SUPPLEMENTARY(c2, c);
 721                 }
 722             } else /* isTrail(c2) */ {
 723                 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
 724                     cp2=U16_GET_SUPPLEMENTARY(c, c2);
 725                 }
 726             }
 727         }
 728
 729         /*
 730          * go down one level for each string
 731          * continue with the main loop as soon as there is a real change
 732          */
 733
 734         if( level1==0 &&
 735             (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
 736         ) {
 737             /* cp1 case-folds to the code point "length" or to p[length] */
 738             if(U_IS_SURROGATE(c1)) {
 739                 if(U_IS_SURROGATE_LEAD(c1)) {
 740                     /* advance beyond source surrogate pair if it case-folds */
 741                     ++s1;
 742                 } else /* isTrail(c1) */ {
 743                     /*
 744                      * we got a supplementary code point when hitting its trail surrogate,
 745                      * therefore the lead surrogate must have been the same as in the other string;
 746                      * compare this decomposition with the lead surrogate in the other string
 747                      * remember that this simulates bulk text replacement:
 748                      * the decomposition would replace the entire code point
 749                      */
 750                     --s2;
 751                     c2=*(s2-1);
 752                 }
 753             }
 754
 755             /* push current level pointers */
 756             stack1[0].start=start1;
 757             stack1[0].s=s1;
 758             stack1[0].limit=limit1;
 759             ++level1;
 760
 761             /* copy the folding result to fold1[] */
 762             if(length<=UCASE_MAX_STRING_LENGTH) {
 763                 u_memcpy(fold1, p, length);
 764             } else {
 765                 int32_t i=0;
 766                 U16_APPEND_UNSAFE(fold1, i, length);
 767                 length=i;
 768             }
 769
 770             /* set next level pointers to case folding */
 771             start1=s1=fold1;
 772             limit1=fold1+length;
 773
 774             /* get ready to read from decomposition, continue with loop */
 775             c1=-1;
 776             continue;
 777         }
 778
 779         if( level2==0 &&
 780             (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
 781         ) {
 782             /* cp2 case-folds to the code point "length" or to p[length] */
 783             if(U_IS_SURROGATE(c2)) {
 784                 if(U_IS_SURROGATE_LEAD(c2)) {
 785                     /* advance beyond source surrogate pair if it case-folds */
 786                     ++s2;
 787                 } else /* isTrail(c2) */ {
 788                     /*
 789                      * we got a supplementary code point when hitting its trail surrogate,
 790                      * therefore the lead surrogate must have been the same as in the other string;
 791                      * compare this decomposition with the lead surrogate in the other string
 792                      * remember that this simulates bulk text replacement:
 793                      * the decomposition would replace the entire code point
 794                      */
 795                     --s1;
 796                     c1=*(s1-1);
 797                 }
 798             }
 799
 800             /* push current level pointers */
 801             stack2[0].start=start2;
 802             stack2[0].s=s2;
 803             stack2[0].limit=limit2;
 804             ++level2;
 805
 806             /* copy the folding result to fold2[] */
 807             if(length<=UCASE_MAX_STRING_LENGTH) {
 808                 u_memcpy(fold2, p, length);
 809             } else {
 810                 int32_t i=0;
 811                 U16_APPEND_UNSAFE(fold2, i, length);
 812                 length=i;
 813             }
 814
 815             /* set next level pointers to case folding */
 816             start2=s2=fold2;
 817             limit2=fold2+length;
 818
 819             /* get ready to read from decomposition, continue with loop */
 820             c2=-1;
 821             continue;
 822         }
 823
 824         /*
 825          * no decomposition/case folding, max level for both sides:
 826          * return difference result
 827          *
 828          * code point order comparison must not just return cp1-cp2
 829          * because when single surrogates are present then the surrogate pairs
 830          * that formed cp1 and cp2 may be from different string indexes
 831          *
 832          * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
 833          * c1=d800 cp1=10001 c2=dc00 cp2=10000
 834          * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
 835          *
 836          * therefore, use same fix-up as in ustring.c/uprv_strCompare()
 837          * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
 838          * so we have slightly different pointer/start/limit comparisons here
 839          */
 840
 841         if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
 842             /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
 843             if(
 844                 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
 845                 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
 846             ) {
 847                 /* part of a surrogate pair, leave >=d800 */
 848             } else {
 849                 /* BMP code point - may be surrogate code point - make <d800 */
 850                 c1-=0x2800;
 851             }
 852
 853             if(
 854                 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
 855                 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
 856             ) {
 857                 /* part of a surrogate pair, leave >=d800 */
 858             } else {
 859                 /* BMP code point - may be surrogate code point - make <d800 */
 860                 c2-=0x2800;
 861             }
 862         }
 863
 864         return c1-c2;
 865     }
 866 }
 867
 868 /* public API functions */
 869
 870 U_CAPI int32_t U_EXPORT2
 871 u_strCaseCompare(const UChar *s1, int32_t length1,
 872                  const UChar *s2, int32_t length2,
 873                  uint32_t options,
 874                  UErrorCode *pErrorCode) {
 875     /* argument checking */
 876     if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
 877         return 0;
 878     }
 879     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
 880         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
 881         return 0;
 882     }
 883     return u_strcmpFold(s1, length1, s2, length2,
 884                         options|U_COMPARE_IGNORE_CASE,
 885                         pErrorCode);
 886 }
 887
 888 U_CAPI int32_t U_EXPORT2
 889 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
 890     UErrorCode errorCode=U_ZERO_ERROR;
 891     return u_strcmpFold(s1, -1, s2, -1,
 892                         options|U_COMPARE_IGNORE_CASE,
 893                         &errorCode);
 894 }
 895
 896 U_CAPI int32_t U_EXPORT2
 897 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
 898     UErrorCode errorCode=U_ZERO_ERROR;
 899     return u_strcmpFold(s1, length, s2, length,
 900                         options|U_COMPARE_IGNORE_CASE,
 901                         &errorCode);
 902 }
 903
 904 U_CAPI int32_t U_EXPORT2
 905 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
 906     UErrorCode errorCode=U_ZERO_ERROR;
 907     return u_strcmpFold(s1, n, s2, n,
 908                         options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
 909                         &errorCode);
 910 }