icuSources/common/ustrcase.c

   1 /*
   2 *******************************************************************************
   3 *
   4 *   Copyright (C) 2001-2004, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 *******************************************************************************
   8 *   file name:  ustrcase.c
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2002feb20
  14 *   created by: Markus W. Scherer
  15 *
  16 *   Implementation file for string casing C API functions.
  17 *   Uses functions from uchar.c for basic functionality that requires access
  18 *   to the Unicode Character Database (uprops.dat).
  19 */
  20
  21 #include "unicode/utypes.h"
  22 #include "unicode/uloc.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/ubrk.h"
  25 #include "cmemory.h"
  26 #include "ucase.h"
  27 #include "unormimp.h"
  28 #include "ustr_imp.h"
  29
  30 /* string casing ------------------------------------------------------------ */
  31
  32 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
  33 static U_INLINE int32_t
  34 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
  35              int32_t result, const UChar *s) {
  36     UChar32 c;
  37     int32_t length;
  38
  39     /* decode the result */
  40     if(result<0) {
  41         /* (not) original code point */
  42         c=~result;
  43         length=-1;
  44     } else if(result<=UCASE_MAX_STRING_LENGTH) {
  45         c=U_SENTINEL;
  46         length=result;
  47     } else {
  48         c=result;
  49         length=-1;
  50     }
  51
  52     if(destIndex<destCapacity) {
  53         /* append the result */
  54         if(length<0) {
  55             /* code point */
  56             UBool isError=FALSE;
  57             U16_APPEND(dest, destIndex, destCapacity, c, isError);
  58             if(isError) {
  59                 /* overflow, nothing written */
  60                 destIndex+=U16_LENGTH(c);
  61             }
  62         } else {
  63             /* string */
  64             if((destIndex+length)<=destCapacity) {
  65                 while(length>0) {
  66                     dest[destIndex++]=*s++;
  67                     --length;
  68                 }
  69             } else {
  70                 /* overflow */
  71                 destIndex+=length;
  72             }
  73         }
  74     } else {
  75         /* preflight */
  76         if(length<0) {
  77             destIndex+=U16_LENGTH(c);
  78         } else {
  79             destIndex+=length;
  80         }
  81     }
  82     return destIndex;
  83 }
  84
  85 static UChar32 U_CALLCONV
  86 utf16_caseContextIterator(void *context, int8_t dir) {
  87     UCaseContext *csc=(UCaseContext *)context;
  88     UChar32 c;
  89
  90     if(dir<0) {
  91         /* reset for backward iteration */
  92         csc->index=csc->cpStart;
  93         csc->dir=dir;
  94     } else if(dir>0) {
  95         /* reset for forward iteration */
  96         csc->index=csc->cpLimit;
  97         csc->dir=dir;
  98     } else {
  99         /* continue current iteration direction */
 100         dir=csc->dir;
 101     }
 102
 103     if(dir<0) {
 104         if(csc->start<csc->index) {
 105             U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
 106             return c;
 107         }
 108     } else {
 109         if(csc->index<csc->limit) {
 110             U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
 111             return c;
 112         }
 113     }
 114     return U_SENTINEL;
 115 }
 116
 117 typedef int32_t U_CALLCONV
 118 UCaseMapFull(const UCaseProps *csp, UChar32 c,
 119              UCaseContextIterator *iter, void *context,
 120              const UChar **pString,
 121              const char *locale, int32_t *locCache);
 122
 123 /*
 124  * Lowercases [srcStart..srcLimit[ but takes
 125  * context [0..srcLength[ into account.
 126  */
 127 static int32_t
 128 _caseMap(UCaseProps *csp, UCaseMapFull *map,
 129          UChar *dest, int32_t destCapacity,
 130          const UChar *src, UCaseContext *csc,
 131          int32_t srcStart, int32_t srcLimit,
 132          const char *locale, int32_t *locCache,
 133          UErrorCode *pErrorCode) {
 134     const UChar *s;
 135     UChar32 c;
 136     int32_t srcIndex, destIndex;
 137
 138     /* case mapping loop */
 139     srcIndex=srcStart;
 140     destIndex=0;
 141     while(srcIndex<srcLimit) {
 142         csc->cpStart=srcIndex;
 143         U16_NEXT(src, srcIndex, srcLimit, c);
 144         csc->cpLimit=srcIndex;
 145         c=map(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
 146         destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 147     }
 148
 149     if(destIndex>destCapacity) {
 150         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 151     }
 152     return destIndex;
 153 }
 154
 155 #if !UCONFIG_NO_BREAK_ITERATION
 156
 157 /*
 158  * Internal titlecasing function.
 159  *
 160  * Must get titleIter!=NULL.
 161  */
 162 static int32_t
 163 _toTitle(UCaseProps *csp,
 164          UChar *dest, int32_t destCapacity,
 165          const UChar *src, UCaseContext *csc,
 166          int32_t srcLength,
 167          UBreakIterator *titleIter,
 168          const char *locale, int32_t *locCache,
 169          UErrorCode *pErrorCode) {
 170     const UChar *s;
 171     UChar32 c;
 172     int32_t prev, index, destIndex;
 173     UBool isFirstIndex;
 174
 175     /* set up local variables */
 176     destIndex=0;
 177     prev=0;
 178     isFirstIndex=TRUE;
 179
 180     /* titlecasing loop */
 181     while(prev<srcLength) {
 182         /* find next index where to titlecase */
 183         if(isFirstIndex) {
 184             isFirstIndex=FALSE;
 185             index=ubrk_first(titleIter);
 186         } else {
 187             index=ubrk_next(titleIter);
 188         }
 189         if(index==UBRK_DONE || index>srcLength) {
 190             index=srcLength;
 191         }
 192
 193         /* lowercase [prev..index[ */
 194         if(prev<index) {
 195             destIndex+=
 196                 _caseMap(
 197                     csp, ucase_toFullLower,
 198                     dest+destIndex, destCapacity-destIndex,
 199                     src, csc,
 200                     prev, index,
 201                     locale, locCache,
 202                     pErrorCode);
 203         }
 204
 205         if(index>=srcLength) {
 206             break;
 207         }
 208
 209         /* titlecase the character at the found index */
 210         csc->cpStart=index;
 211         U16_NEXT(src, index, srcLength, c);
 212         csc->cpLimit=index;
 213         c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
 214         destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 215
 216         prev=index;
 217     }
 218
 219     if(destIndex>destCapacity) {
 220         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 221     }
 222     return destIndex;
 223 }
 224
 225 U_CFUNC int32_t
 226 ustr_toTitle(UCaseProps *csp,
 227              UChar *dest, int32_t destCapacity,
 228              const UChar *src, int32_t srcLength,
 229              UBreakIterator *titleIter,
 230              const char *locale,
 231              UErrorCode *pErrorCode) {
 232     UCaseContext csc={ NULL };
 233     int32_t locCache;
 234
 235     csc.p=(void *)src;
 236     csc.limit=srcLength;
 237     locCache=0;
 238
 239     return _toTitle(csp,
 240                     dest, destCapacity,
 241                     src, &csc, srcLength,
 242                     titleIter, locale, &locCache, pErrorCode);
 243 }
 244
 245 #endif
 246
 247 /* functions available in the common library (for unistr_case.cpp) */
 248
 249 U_CFUNC int32_t
 250 ustr_toLower(UCaseProps *csp,
 251              UChar *dest, int32_t destCapacity,
 252              const UChar *src, int32_t srcLength,
 253              const char *locale,
 254              UErrorCode *pErrorCode) {
 255     UCaseContext csc={ NULL };
 256     int32_t locCache;
 257
 258     csc.p=(void *)src;
 259     csc.limit=srcLength;
 260     locCache=0;
 261
 262     return _caseMap(csp, ucase_toFullLower,
 263                     dest, destCapacity,
 264                     src, &csc, 0, srcLength,
 265                     locale, &locCache, pErrorCode);
 266 }
 267
 268 U_CFUNC int32_t
 269 ustr_toUpper(UCaseProps *csp,
 270              UChar *dest, int32_t destCapacity,
 271              const UChar *src, int32_t srcLength,
 272              const char *locale,
 273              UErrorCode *pErrorCode) {
 274     UCaseContext csc={ NULL };
 275     int32_t locCache;
 276
 277     csc.p=(void *)src;
 278     csc.limit=srcLength;
 279     locCache=0;
 280
 281     return _caseMap(csp, ucase_toFullUpper,
 282                     dest, destCapacity,
 283                     src, &csc, 0, srcLength,
 284                     locale, &locCache, pErrorCode);
 285 }
 286
 287 U_CFUNC int32_t
 288 ustr_foldCase(UCaseProps *csp,
 289               UChar *dest, int32_t destCapacity,
 290               const UChar *src, int32_t srcLength,
 291               uint32_t options,
 292               UErrorCode *pErrorCode) {
 293     int32_t srcIndex, destIndex;
 294
 295     const UChar *s;
 296     UChar32 c;
 297
 298     /* case mapping loop */
 299     srcIndex=destIndex=0;
 300     while(srcIndex<srcLength) {
 301         U16_NEXT(src, srcIndex, srcLength, c);
 302         c=ucase_toFullFolding(csp, c, &s, options);
 303         destIndex=appendResult(dest, destIndex, destCapacity, c, s);
 304     }
 305
 306     if(destIndex>destCapacity) {
 307         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 308     }
 309     return destIndex;
 310 }
 311
 312 /*
 313  * Implement argument checking and buffer handling
 314  * for string case mapping as a common function.
 315  */
 316 enum {
 317     TO_LOWER,
 318     TO_UPPER,
 319     TO_TITLE,
 320     FOLD_CASE
 321 };
 322
 323 /* common internal function for public API functions */
 324
 325 static int32_t
 326 caseMap(UChar *dest, int32_t destCapacity,
 327         const UChar *src, int32_t srcLength,
 328         UBreakIterator *titleIter,
 329         const char *locale,
 330         uint32_t options,
 331         int32_t toWhichCase,
 332         UErrorCode *pErrorCode) {
 333     UChar buffer[300];
 334     UChar *temp;
 335
 336     UCaseProps *csp;
 337
 338     int32_t destLength;
 339     UBool ownTitleIter;
 340
 341     /* check argument values */
 342     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
 343         return 0;
 344     }
 345     if( destCapacity<0 ||
 346         (dest==NULL && destCapacity>0) ||
 347         src==NULL ||
 348         srcLength<-1
 349     ) {
 350         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
 351         return 0;
 352     }
 353
 354     csp=ucase_getSingleton(pErrorCode);
 355     if(U_FAILURE(*pErrorCode)) {
 356         return 0;
 357     }
 358
 359     /* get the string length */
 360     if(srcLength==-1) {
 361         srcLength=u_strlen(src);
 362     }
 363
 364     /* check for overlapping source and destination */
 365     if( dest!=NULL &&
 366         ((src>=dest && src<(dest+destCapacity)) ||
 367          (dest>=src && dest<(src+srcLength)))
 368     ) {
 369         /* overlap: provide a temporary destination buffer and later copy the result */
 370         if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) {
 371             /* the stack buffer is large enough */
 372             temp=buffer;
 373         } else {
 374             /* allocate a buffer */
 375             temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
 376             if(temp==NULL) {
 377                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
 378                 return 0;
 379             }
 380         }
 381     } else {
 382         temp=dest;
 383     }
 384
 385     ownTitleIter=FALSE;
 386     destLength=0;
 387
 388     if(toWhichCase==FOLD_CASE) {
 389         destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength,
 390                                  options, pErrorCode);
 391     } else {
 392         UCaseContext csc={ NULL };
 393         int32_t locCache;
 394
 395         csc.p=(void *)src;
 396         csc.limit=srcLength;
 397         locCache=0;
 398
 399         /* the internal functions require locale!=NULL */
 400         if(locale==NULL) {
 401             locale=uloc_getDefault();
 402         }
 403
 404         if(toWhichCase==TO_LOWER) {
 405             destLength=_caseMap(csp, ucase_toFullLower,
 406                                 temp, destCapacity,
 407                                 src, &csc,
 408                                 0, srcLength,
 409                                 locale, &locCache, pErrorCode);
 410         } else if(toWhichCase==TO_UPPER) {
 411             destLength=_caseMap(csp, ucase_toFullUpper,
 412                                 temp, destCapacity,
 413                                 src, &csc,
 414                                 0, srcLength,
 415                                 locale, &locCache, pErrorCode);
 416         } else /* if(toWhichCase==TO_TITLE) */ {
 417     #if UCONFIG_NO_BREAK_ITERATION
 418             *pErrorCode=U_UNSUPPORTED_ERROR;
 419     #else
 420             if(titleIter==NULL) {
 421                 titleIter=ubrk_open(UBRK_WORD, locale,
 422                                     src, srcLength,
 423                                     pErrorCode);
 424                 ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
 425             }
 426             if(U_SUCCESS(*pErrorCode)) {
 427                 destLength=_toTitle(csp, temp, destCapacity,
 428                                     src, &csc, srcLength,
 429                                     titleIter, locale, &locCache, pErrorCode);
 430             }
 431     #endif
 432         }
 433     }
 434     if(temp!=dest) {
 435         /* copy the result string to the destination buffer */
 436         if(destLength>0) {
 437             int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
 438             if(copyLength>0) {
 439                 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
 440             }
 441         }
 442         if(temp!=buffer) {
 443             uprv_free(temp);
 444         }
 445     }
 446
 447 #if !UCONFIG_NO_BREAK_ITERATION
 448     if(ownTitleIter) {
 449         ubrk_close(titleIter);
 450     }
 451 #endif
 452
 453     return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
 454 }
 455
 456 /* public API functions */
 457
 458 U_CAPI int32_t U_EXPORT2
 459 u_strToLower(UChar *dest, int32_t destCapacity,
 460              const UChar *src, int32_t srcLength,
 461              const char *locale,
 462              UErrorCode *pErrorCode) {
 463     return caseMap(dest, destCapacity,
 464                    src, srcLength,
 465                    NULL, locale, 0,
 466                    TO_LOWER, pErrorCode);
 467 }
 468
 469 U_CAPI int32_t U_EXPORT2
 470 u_strToUpper(UChar *dest, int32_t destCapacity,
 471              const UChar *src, int32_t srcLength,
 472              const char *locale,
 473              UErrorCode *pErrorCode) {
 474     return caseMap(dest, destCapacity,
 475                    src, srcLength,
 476                    NULL, locale, 0,
 477                    TO_UPPER, pErrorCode);
 478 }
 479
 480 #if !UCONFIG_NO_BREAK_ITERATION
 481
 482 U_CAPI int32_t U_EXPORT2
 483 u_strToTitle(UChar *dest, int32_t destCapacity,
 484              const UChar *src, int32_t srcLength,
 485              UBreakIterator *titleIter,
 486              const char *locale,
 487              UErrorCode *pErrorCode) {
 488     return caseMap(dest, destCapacity,
 489                    src, srcLength,
 490                    titleIter, locale, 0,
 491                    TO_TITLE, pErrorCode);
 492 }
 493
 494 #endif
 495
 496 U_CAPI int32_t U_EXPORT2
 497 u_strFoldCase(UChar *dest, int32_t destCapacity,
 498               const UChar *src, int32_t srcLength,
 499               uint32_t options,
 500               UErrorCode *pErrorCode) {
 501     return caseMap(dest, destCapacity,
 502                    src, srcLength,
 503                    NULL, NULL, options,
 504                    FOLD_CASE, pErrorCode);
 505 }
 506
 507 /* case-insensitive string comparisons -------------------------------------- */
 508
 509 /*
 510  * This function is a copy of unorm_cmpEquivFold() minus the parts for
 511  * canonical equivalence.
 512  * Keep the functions in sync, and see there for how this works.
 513  * The duplication is for modularization:
 514  * It makes caseless (but not canonical caseless) matches independent of
 515  * the normalization code.
 516  */
 517
 518 /* stack element for previous-level source/decomposition pointers */
 519 struct CmpEquivLevel {
 520     const UChar *start, *s, *limit;
 521 };
 522 typedef struct CmpEquivLevel CmpEquivLevel;
 523
 524 /* internal function */
 525 U_CFUNC int32_t
 526 u_strcmpFold(const UChar *s1, int32_t length1,
 527              const UChar *s2, int32_t length2,
 528              uint32_t options,
 529              UErrorCode *pErrorCode) {
 530     UCaseProps *csp;
 531
 532     /* current-level start/limit - s1/s2 as current */
 533     const UChar *start1, *start2, *limit1, *limit2;
 534
 535     /* case folding variables */
 536     const UChar *p;
 537     int32_t length;
 538
 539     /* stacks of previous-level start/current/limit */
 540     CmpEquivLevel stack1[2], stack2[2];
 541
 542     /* case folding buffers, only use current-level start/limit */
 543     UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
 544
 545     /* track which is the current level per string */
 546     int32_t level1, level2;
 547
 548     /* current code units, and code points for lookups */
 549     UChar32 c1, c2, cp1, cp2;
 550
 551     /* no argument error checking because this itself is not an API */
 552
 553     /*
 554      * assume that at least the option U_COMPARE_IGNORE_CASE is set
 555      * otherwise this function would have to behave exactly as uprv_strCompare()
 556      */
 557     csp=ucase_getSingleton(pErrorCode);
 558     if(U_FAILURE(*pErrorCode)) {
 559         return 0;
 560     }
 561
 562     /* initialize */
 563     start1=s1;
 564     if(length1==-1) {
 565         limit1=NULL;
 566     } else {
 567         limit1=s1+length1;
 568     }
 569
 570     start2=s2;
 571     if(length2==-1) {
 572         limit2=NULL;
 573     } else {
 574         limit2=s2+length2;
 575     }
 576
 577     level1=level2=0;
 578     c1=c2=-1;
 579
 580     /* comparison loop */
 581     for(;;) {
 582         /*
 583          * here a code unit value of -1 means "get another code unit"
 584          * below it will mean "this source is finished"
 585          */
 586
 587         if(c1<0) {
 588             /* get next code unit from string 1, post-increment */
 589             for(;;) {
 590                 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
 591                     if(level1==0) {
 592                         c1=-1;
 593                         break;
 594                     }
 595                 } else {
 596                     ++s1;
 597                     break;
 598                 }
 599
 600                 /* reached end of level buffer, pop one level */
 601                 do {
 602                     --level1;
 603                     start1=stack1[level1].start;
 604                 } while(start1==NULL);
 605                 s1=stack1[level1].s;
 606                 limit1=stack1[level1].limit;
 607             }
 608         }
 609
 610         if(c2<0) {
 611             /* get next code unit from string 2, post-increment */
 612             for(;;) {
 613                 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
 614                     if(level2==0) {
 615                         c2=-1;
 616                         break;
 617                     }
 618                 } else {
 619                     ++s2;
 620                     break;
 621                 }
 622
 623                 /* reached end of level buffer, pop one level */
 624                 do {
 625                     --level2;
 626                     start2=stack2[level2].start;
 627                 } while(start2==NULL);
 628                 s2=stack2[level2].s;
 629                 limit2=stack2[level2].limit;
 630             }
 631         }
 632
 633         /*
 634          * compare c1 and c2
 635          * either variable c1, c2 is -1 only if the corresponding string is finished
 636          */
 637         if(c1==c2) {
 638             if(c1<0) {
 639                 return 0;   /* c1==c2==-1 indicating end of strings */
 640             }
 641             c1=c2=-1;       /* make us fetch new code units */
 642             continue;
 643         } else if(c1<0) {
 644             return -1;      /* string 1 ends before string 2 */
 645         } else if(c2<0) {
 646             return 1;       /* string 2 ends before string 1 */
 647         }
 648         /* c1!=c2 && c1>=0 && c2>=0 */
 649
 650         /* get complete code points for c1, c2 for lookups if either is a surrogate */
 651         cp1=c1;
 652         if(U_IS_SURROGATE(c1)) {
 653             UChar c;
 654
 655             if(U_IS_SURROGATE_LEAD(c1)) {
 656                 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
 657                     /* advance ++s1; only below if cp1 decomposes/case-folds */
 658                     cp1=U16_GET_SUPPLEMENTARY(c1, c);
 659                 }
 660             } else /* isTrail(c1) */ {
 661                 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
 662                     cp1=U16_GET_SUPPLEMENTARY(c, c1);
 663                 }
 664             }
 665         }
 666
 667         cp2=c2;
 668         if(U_IS_SURROGATE(c2)) {
 669             UChar c;
 670
 671             if(U_IS_SURROGATE_LEAD(c2)) {
 672                 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
 673                     /* advance ++s2; only below if cp2 decomposes/case-folds */
 674                     cp2=U16_GET_SUPPLEMENTARY(c2, c);
 675                 }
 676             } else /* isTrail(c2) */ {
 677                 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
 678                     cp2=U16_GET_SUPPLEMENTARY(c, c2);
 679                 }
 680             }
 681         }
 682
 683         /*
 684          * go down one level for each string
 685          * continue with the main loop as soon as there is a real change
 686          */
 687
 688         if( level1==0 &&
 689             (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
 690         ) {
 691             /* cp1 case-folds to the code point "length" or to p[length] */
 692             if(U_IS_SURROGATE(c1)) {
 693                 if(U_IS_SURROGATE_LEAD(c1)) {
 694                     /* advance beyond source surrogate pair if it case-folds */
 695                     ++s1;
 696                 } else /* isTrail(c1) */ {
 697                     /*
 698                      * we got a supplementary code point when hitting its trail surrogate,
 699                      * therefore the lead surrogate must have been the same as in the other string;
 700                      * compare this decomposition with the lead surrogate in the other string
 701                      * remember that this simulates bulk text replacement:
 702                      * the decomposition would replace the entire code point
 703                      */
 704                     --s2;
 705                     c2=*(s2-1);
 706                 }
 707             }
 708
 709             /* push current level pointers */
 710             stack1[0].start=start1;
 711             stack1[0].s=s1;
 712             stack1[0].limit=limit1;
 713             ++level1;
 714
 715             /* copy the folding result to fold1[] */
 716             if(length<=UCASE_MAX_STRING_LENGTH) {
 717                 u_memcpy(fold1, p, length);
 718             } else {
 719                 int32_t i=0;
 720                 U16_APPEND_UNSAFE(fold1, i, length);
 721                 length=i;
 722             }
 723
 724             /* set next level pointers to case folding */
 725             start1=s1=fold1;
 726             limit1=fold1+length;
 727
 728             /* get ready to read from decomposition, continue with loop */
 729             c1=-1;
 730             continue;
 731         }
 732
 733         if( level2==0 &&
 734             (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
 735         ) {
 736             /* cp2 case-folds to the code point "length" or to p[length] */
 737             if(U_IS_SURROGATE(c2)) {
 738                 if(U_IS_SURROGATE_LEAD(c2)) {
 739                     /* advance beyond source surrogate pair if it case-folds */
 740                     ++s2;
 741                 } else /* isTrail(c2) */ {
 742                     /*
 743                      * we got a supplementary code point when hitting its trail surrogate,
 744                      * therefore the lead surrogate must have been the same as in the other string;
 745                      * compare this decomposition with the lead surrogate in the other string
 746                      * remember that this simulates bulk text replacement:
 747                      * the decomposition would replace the entire code point
 748                      */
 749                     --s1;
 750                     c1=*(s1-1);
 751                 }
 752             }
 753
 754             /* push current level pointers */
 755             stack2[0].start=start2;
 756             stack2[0].s=s2;
 757             stack2[0].limit=limit2;
 758             ++level2;
 759
 760             /* copy the folding result to fold2[] */
 761             if(length<=UCASE_MAX_STRING_LENGTH) {
 762                 u_memcpy(fold2, p, length);
 763             } else {
 764                 int32_t i=0;
 765                 U16_APPEND_UNSAFE(fold2, i, length);
 766                 length=i;
 767             }
 768
 769             /* set next level pointers to case folding */
 770             start2=s2=fold2;
 771             limit2=fold2+length;
 772
 773             /* get ready to read from decomposition, continue with loop */
 774             c2=-1;
 775             continue;
 776         }
 777
 778         /*
 779          * no decomposition/case folding, max level for both sides:
 780          * return difference result
 781          *
 782          * code point order comparison must not just return cp1-cp2
 783          * because when single surrogates are present then the surrogate pairs
 784          * that formed cp1 and cp2 may be from different string indexes
 785          *
 786          * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
 787          * c1=d800 cp1=10001 c2=dc00 cp2=10000
 788          * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
 789          *
 790          * therefore, use same fix-up as in ustring.c/uprv_strCompare()
 791          * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
 792          * so we have slightly different pointer/start/limit comparisons here
 793          */
 794
 795         if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
 796             /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
 797             if(
 798                 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
 799                 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
 800             ) {
 801                 /* part of a surrogate pair, leave >=d800 */
 802             } else {
 803                 /* BMP code point - may be surrogate code point - make <d800 */
 804                 c1-=0x2800;
 805             }
 806
 807             if(
 808                 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
 809                 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
 810             ) {
 811                 /* part of a surrogate pair, leave >=d800 */
 812             } else {
 813                 /* BMP code point - may be surrogate code point - make <d800 */
 814                 c2-=0x2800;
 815             }
 816         }
 817
 818         return c1-c2;
 819     }
 820 }
 821
 822 /* public API functions */
 823
 824 U_CAPI int32_t U_EXPORT2
 825 u_strCaseCompare(const UChar *s1, int32_t length1,
 826                  const UChar *s2, int32_t length2,
 827                  uint32_t options,
 828                  UErrorCode *pErrorCode) {
 829     /* argument checking */
 830     if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
 831         return 0;
 832     }
 833     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
 834         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
 835         return 0;
 836     }
 837     return u_strcmpFold(s1, length1, s2, length2,
 838                         options|U_COMPARE_IGNORE_CASE,
 839                         pErrorCode);
 840 }
 841
 842 U_CAPI int32_t U_EXPORT2
 843 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
 844     UErrorCode errorCode=U_ZERO_ERROR;
 845     return u_strcmpFold(s1, -1, s2, -1,
 846                         options|U_COMPARE_IGNORE_CASE,
 847                         &errorCode);
 848 }
 849
 850 U_CAPI int32_t U_EXPORT2
 851 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
 852     UErrorCode errorCode=U_ZERO_ERROR;
 853     return u_strcmpFold(s1, length, s2, length,
 854                         options|U_COMPARE_IGNORE_CASE,
 855                         &errorCode);
 856 }
 857
 858 U_CAPI int32_t U_EXPORT2
 859 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
 860     UErrorCode errorCode=U_ZERO_ERROR;
 861     return u_strcmpFold(s1, n, s2, n,
 862                         options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
 863                         &errorCode);
 864 }