icuSources/common/ustring.cpp

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1998-2016, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 * File ustring.cpp
  10 *
  11 * Modification History:
  12 *
  13 *   Date        Name        Description
  14 *   12/07/98    bertrand    Creation.
  15 ******************************************************************************
  16 */
  17
  18 #include "unicode/utypes.h"
  19 #include "unicode/putil.h"
  20 #include "unicode/ustring.h"
  21 #include "unicode/utf16.h"
  22 #include "cstring.h"
  23 #include "cwchar.h"
  24 #include "cmemory.h"
  25 #include "ustr_imp.h"
  26
  27 /* ANSI string.h - style functions ------------------------------------------ */
  28
  29 /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
  30 #define U_BMP_MAX 0xffff
  31
  32 /* Forward binary string search functions ----------------------------------- */
  33
  34 /*
  35  * Test if a substring match inside a string is at code point boundaries.
  36  * All pointers refer to the same buffer.
  37  * The limit pointer may be NULL, all others must be real pointers.
  38  */
  39 static inline UBool
  40 isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
  41     if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
  42         /* the leading edge of the match is in the middle of a surrogate pair */
  43         return FALSE;
  44     }
  45     if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
  46         /* the trailing edge of the match is in the middle of a surrogate pair */
  47         return FALSE;
  48     }
  49     return TRUE;
  50 }
  51
  52 U_CAPI UChar * U_EXPORT2
  53 u_strFindFirst(const UChar *s, int32_t length,
  54                const UChar *sub, int32_t subLength) {
  55     const UChar *start, *p, *q, *subLimit;
  56     UChar c, cs, cq;
  57
  58     if(sub==NULL || subLength<-1) {
  59         return (UChar *)s;
  60     }
  61     if(s==NULL || length<-1) {
  62         return NULL;
  63     }
  64
  65     start=s;
  66
  67     if(length<0 && subLength<0) {
  68         /* both strings are NUL-terminated */
  69         if((cs=*sub++)==0) {
  70             return (UChar *)s;
  71         }
  72         if(*sub==0 && !U16_IS_SURROGATE(cs)) {
  73             /* the substring consists of a single, non-surrogate BMP code point */
  74             return u_strchr(s, cs);
  75         }
  76
  77         while((c=*s++)!=0) {
  78             if(c==cs) {
  79                 /* found first substring UChar, compare rest */
  80                 p=s;
  81                 q=sub;
  82                 for(;;) {
  83                     if((cq=*q)==0) {
  84                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
  85                             return (UChar *)(s-1); /* well-formed match */
  86                         } else {
  87                             break; /* no match because surrogate pair is split */
  88                         }
  89                     }
  90                     if((c=*p)==0) {
  91                         return NULL; /* no match, and none possible after s */
  92                     }
  93                     if(c!=cq) {
  94                         break; /* no match */
  95                     }
  96                     ++p;
  97                     ++q;
  98                 }
  99             }
 100         }
 101
 102         /* not found */
 103         return NULL;
 104     }
 105
 106     if(subLength<0) {
 107         subLength=u_strlen(sub);
 108     }
 109     if(subLength==0) {
 110         return (UChar *)s;
 111     }
 112
 113     /* get sub[0] to search for it fast */
 114     cs=*sub++;
 115     --subLength;
 116     subLimit=sub+subLength;
 117
 118     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
 119         /* the substring consists of a single, non-surrogate BMP code point */
 120         return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
 121     }
 122
 123     if(length<0) {
 124         /* s is NUL-terminated */
 125         while((c=*s++)!=0) {
 126             if(c==cs) {
 127                 /* found first substring UChar, compare rest */
 128                 p=s;
 129                 q=sub;
 130                 for(;;) {
 131                     if(q==subLimit) {
 132                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
 133                             return (UChar *)(s-1); /* well-formed match */
 134                         } else {
 135                             break; /* no match because surrogate pair is split */
 136                         }
 137                     }
 138                     if((c=*p)==0) {
 139                         return NULL; /* no match, and none possible after s */
 140                     }
 141                     if(c!=*q) {
 142                         break; /* no match */
 143                     }
 144                     ++p;
 145                     ++q;
 146                 }
 147             }
 148         }
 149     } else {
 150         const UChar *limit, *preLimit;
 151
 152         /* subLength was decremented above */
 153         if(length<=subLength) {
 154             return NULL; /* s is shorter than sub */
 155         }
 156
 157         limit=s+length;
 158
 159         /* the substring must start before preLimit */
 160         preLimit=limit-subLength;
 161
 162         while(s!=preLimit) {
 163             c=*s++;
 164             if(c==cs) {
 165                 /* found first substring UChar, compare rest */
 166                 p=s;
 167                 q=sub;
 168                 for(;;) {
 169                     if(q==subLimit) {
 170                         if(isMatchAtCPBoundary(start, s-1, p, limit)) {
 171                             return (UChar *)(s-1); /* well-formed match */
 172                         } else {
 173                             break; /* no match because surrogate pair is split */
 174                         }
 175                     }
 176                     if(*p!=*q) {
 177                         break; /* no match */
 178                     }
 179                     ++p;
 180                     ++q;
 181                 }
 182             }
 183         }
 184     }
 185
 186     /* not found */
 187     return NULL;
 188 }
 189
 190 U_CAPI UChar * U_EXPORT2
 191 u_strstr(const UChar *s, const UChar *substring) {
 192     return u_strFindFirst(s, -1, substring, -1);
 193 }
 194
 195 U_CAPI UChar * U_EXPORT2
 196 u_strchr(const UChar *s, UChar c) {
 197     if(U16_IS_SURROGATE(c)) {
 198         /* make sure to not find half of a surrogate pair */
 199         return u_strFindFirst(s, -1, &c, 1);
 200     } else {
 201         UChar cs;
 202
 203         /* trivial search for a BMP code point */
 204         for(;;) {
 205             if((cs=*s)==c) {
 206                 return (UChar *)s;
 207             }
 208             if(cs==0) {
 209                 return NULL;
 210             }
 211             ++s;
 212         }
 213     }
 214 }
 215
 216 U_CAPI UChar * U_EXPORT2
 217 u_strchr32(const UChar *s, UChar32 c) {
 218     if((uint32_t)c<=U_BMP_MAX) {
 219         /* find BMP code point */
 220         return u_strchr(s, (UChar)c);
 221     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
 222         /* find supplementary code point as surrogate pair */
 223         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
 224
 225         while((cs=*s++)!=0) {
 226             if(cs==lead && *s==trail) {
 227                 return (UChar *)(s-1);
 228             }
 229         }
 230         return NULL;
 231     } else {
 232         /* not a Unicode code point, not findable */
 233         return NULL;
 234     }
 235 }
 236
 237 U_CAPI UChar * U_EXPORT2
 238 u_memchr(const UChar *s, UChar c, int32_t count) {
 239     if(count<=0) {
 240         return NULL; /* no string */
 241     } else if(U16_IS_SURROGATE(c)) {
 242         /* make sure to not find half of a surrogate pair */
 243         return u_strFindFirst(s, count, &c, 1);
 244     } else {
 245         /* trivial search for a BMP code point */
 246         const UChar *limit=s+count;
 247         do {
 248             if(*s==c) {
 249                 return (UChar *)s;
 250             }
 251         } while(++s!=limit);
 252         return NULL;
 253     }
 254 }
 255
 256 U_CAPI UChar * U_EXPORT2
 257 u_memchr32(const UChar *s, UChar32 c, int32_t count) {
 258     if((uint32_t)c<=U_BMP_MAX) {
 259         /* find BMP code point */
 260         return u_memchr(s, (UChar)c, count);
 261     } else if(count<2) {
 262         /* too short for a surrogate pair */
 263         return NULL;
 264     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
 265         /* find supplementary code point as surrogate pair */
 266         const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
 267         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
 268
 269         do {
 270             if(*s==lead && *(s+1)==trail) {
 271                 return (UChar *)s;
 272             }
 273         } while(++s!=limit);
 274         return NULL;
 275     } else {
 276         /* not a Unicode code point, not findable */
 277         return NULL;
 278     }
 279 }
 280
 281 /* Backward binary string search functions ---------------------------------- */
 282
 283 U_CAPI UChar * U_EXPORT2
 284 u_strFindLast(const UChar *s, int32_t length,
 285               const UChar *sub, int32_t subLength) {
 286     const UChar *start, *limit, *p, *q, *subLimit;
 287     UChar c, cs;
 288
 289     if(sub==NULL || subLength<-1) {
 290         return (UChar *)s;
 291     }
 292     if(s==NULL || length<-1) {
 293         return NULL;
 294     }
 295
 296     /*
 297      * This implementation is more lazy than the one for u_strFindFirst():
 298      * There is no special search code for NUL-terminated strings.
 299      * It does not seem to be worth it for searching substrings to
 300      * search forward and find all matches like in u_strrchr() and similar.
 301      * Therefore, we simply get both string lengths and search backward.
 302      *
 303      * markus 2002oct23
 304      */
 305
 306     if(subLength<0) {
 307         subLength=u_strlen(sub);
 308     }
 309     if(subLength==0) {
 310         return (UChar *)s;
 311     }
 312
 313     /* get sub[subLength-1] to search for it fast */
 314     subLimit=sub+subLength;
 315     cs=*(--subLimit);
 316     --subLength;
 317
 318     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
 319         /* the substring consists of a single, non-surrogate BMP code point */
 320         return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
 321     }
 322
 323     if(length<0) {
 324         length=u_strlen(s);
 325     }
 326
 327     /* subLength was decremented above */
 328     if(length<=subLength) {
 329         return NULL; /* s is shorter than sub */
 330     }
 331
 332     start=s;
 333     limit=s+length;
 334
 335     /* the substring must start no later than s+subLength */
 336     s+=subLength;
 337
 338     while(s!=limit) {
 339         c=*(--limit);
 340         if(c==cs) {
 341             /* found last substring UChar, compare rest */
 342             p=limit;
 343             q=subLimit;
 344             for(;;) {
 345                 if(q==sub) {
 346                     if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
 347                         return (UChar *)p; /* well-formed match */
 348                     } else {
 349                         break; /* no match because surrogate pair is split */
 350                     }
 351                 }
 352                 if(*(--p)!=*(--q)) {
 353                     break; /* no match */
 354                 }
 355             }
 356         }
 357     }
 358
 359     /* not found */
 360     return NULL;
 361 }
 362
 363 U_CAPI UChar * U_EXPORT2
 364 u_strrstr(const UChar *s, const UChar *substring) {
 365     return u_strFindLast(s, -1, substring, -1);
 366 }
 367
 368 U_CAPI UChar * U_EXPORT2
 369 u_strrchr(const UChar *s, UChar c) {
 370     if(U16_IS_SURROGATE(c)) {
 371         /* make sure to not find half of a surrogate pair */
 372         return u_strFindLast(s, -1, &c, 1);
 373     } else {
 374         const UChar *result=NULL;
 375         UChar cs;
 376
 377         /* trivial search for a BMP code point */
 378         for(;;) {
 379             if((cs=*s)==c) {
 380                 result=s;
 381             }
 382             if(cs==0) {
 383                 return (UChar *)result;
 384             }
 385             ++s;
 386         }
 387     }
 388 }
 389
 390 U_CAPI UChar * U_EXPORT2
 391 u_strrchr32(const UChar *s, UChar32 c) {
 392     if((uint32_t)c<=U_BMP_MAX) {
 393         /* find BMP code point */
 394         return u_strrchr(s, (UChar)c);
 395     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
 396         /* find supplementary code point as surrogate pair */
 397         const UChar *result=NULL;
 398         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
 399
 400         while((cs=*s++)!=0) {
 401             if(cs==lead && *s==trail) {
 402                 result=s-1;
 403             }
 404         }
 405         return (UChar *)result;
 406     } else {
 407         /* not a Unicode code point, not findable */
 408         return NULL;
 409     }
 410 }
 411
 412 U_CAPI UChar * U_EXPORT2
 413 u_memrchr(const UChar *s, UChar c, int32_t count) {
 414     if(count<=0) {
 415         return NULL; /* no string */
 416     } else if(U16_IS_SURROGATE(c)) {
 417         /* make sure to not find half of a surrogate pair */
 418         return u_strFindLast(s, count, &c, 1);
 419     } else {
 420         /* trivial search for a BMP code point */
 421         const UChar *limit=s+count;
 422         do {
 423             if(*(--limit)==c) {
 424                 return (UChar *)limit;
 425             }
 426         } while(s!=limit);
 427         return NULL;
 428     }
 429 }
 430
 431 U_CAPI UChar * U_EXPORT2
 432 u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
 433     if((uint32_t)c<=U_BMP_MAX) {
 434         /* find BMP code point */
 435         return u_memrchr(s, (UChar)c, count);
 436     } else if(count<2) {
 437         /* too short for a surrogate pair */
 438         return NULL;
 439     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
 440         /* find supplementary code point as surrogate pair */
 441         const UChar *limit=s+count-1;
 442         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
 443
 444         do {
 445             if(*limit==trail && *(limit-1)==lead) {
 446                 return (UChar *)(limit-1);
 447             }
 448         } while(s!=--limit);
 449         return NULL;
 450     } else {
 451         /* not a Unicode code point, not findable */
 452         return NULL;
 453     }
 454 }
 455
 456 /* Tokenization functions --------------------------------------------------- */
 457
 458 /*
 459  * Match each code point in a string against each code point in the matchSet.
 460  * Return the index of the first string code point that
 461  * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
 462  * Return -(string length)-1 if there is no such code point.
 463  */
 464 static int32_t
 465 _matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
 466     int32_t matchLen, matchBMPLen, strItr, matchItr;
 467     UChar32 stringCh, matchCh;
 468     UChar c, c2;
 469
 470     /* first part of matchSet contains only BMP code points */
 471     matchBMPLen = 0;
 472     while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
 473         ++matchBMPLen;
 474     }
 475
 476     /* second part of matchSet contains BMP and supplementary code points */
 477     matchLen = matchBMPLen;
 478     while(matchSet[matchLen] != 0) {
 479         ++matchLen;
 480     }
 481
 482     for(strItr = 0; (c = string[strItr]) != 0;) {
 483         ++strItr;
 484         if(U16_IS_SINGLE(c)) {
 485             if(polarity) {
 486                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
 487                     if(c == matchSet[matchItr]) {
 488                         return strItr - 1; /* one matches */
 489                     }
 490                 }
 491             } else {
 492                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
 493                     if(c == matchSet[matchItr]) {
 494                         goto endloop;
 495                     }
 496                 }
 497                 return strItr - 1; /* none matches */
 498             }
 499         } else {
 500             /*
 501              * No need to check for string length before U16_IS_TRAIL
 502              * because c2 could at worst be the terminating NUL.
 503              */
 504             if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
 505                 ++strItr;
 506                 stringCh = U16_GET_SUPPLEMENTARY(c, c2);
 507             } else {
 508                 stringCh = c; /* unpaired trail surrogate */
 509             }
 510
 511             if(polarity) {
 512                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
 513                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
 514                     if(stringCh == matchCh) {
 515                         return strItr - U16_LENGTH(stringCh); /* one matches */
 516                     }
 517                 }
 518             } else {
 519                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
 520                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
 521                     if(stringCh == matchCh) {
 522                         goto endloop;
 523                     }
 524                 }
 525                 return strItr - U16_LENGTH(stringCh); /* none matches */
 526             }
 527         }
 528 endloop:
 529         /* wish C had continue with labels like Java... */;
 530     }
 531
 532     /* Didn't find it. */
 533     return -strItr-1;
 534 }
 535
 536 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
 537 U_CAPI UChar * U_EXPORT2
 538 u_strpbrk(const UChar *string, const UChar *matchSet)
 539 {
 540     int32_t idx = _matchFromSet(string, matchSet, TRUE);
 541     if(idx >= 0) {
 542         return (UChar *)string + idx;
 543     } else {
 544         return NULL;
 545     }
 546 }
 547
 548 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
 549 U_CAPI int32_t U_EXPORT2
 550 u_strcspn(const UChar *string, const UChar *matchSet)
 551 {
 552     int32_t idx = _matchFromSet(string, matchSet, TRUE);
 553     if(idx >= 0) {
 554         return idx;
 555     } else {
 556         return -idx - 1; /* == u_strlen(string) */
 557     }
 558 }
 559
 560 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
 561 U_CAPI int32_t U_EXPORT2
 562 u_strspn(const UChar *string, const UChar *matchSet)
 563 {
 564     int32_t idx = _matchFromSet(string, matchSet, FALSE);
 565     if(idx >= 0) {
 566         return idx;
 567     } else {
 568         return -idx - 1; /* == u_strlen(string) */
 569     }
 570 }
 571
 572 /* ----- Text manipulation functions --- */
 573
 574 U_CAPI UChar* U_EXPORT2
 575 u_strtok_r(UChar    *src,
 576      const UChar    *delim,
 577            UChar   **saveState)
 578 {
 579     UChar *tokSource;
 580     UChar *nextToken;
 581     uint32_t nonDelimIdx;
 582
 583     /* If saveState is NULL, the user messed up. */
 584     if (src != NULL) {
 585         tokSource = src;
 586         *saveState = src; /* Set to "src" in case there are no delimiters */
 587     }
 588     else if (*saveState) {
 589         tokSource = *saveState;
 590     }
 591     else {
 592         /* src == NULL && *saveState == NULL */
 593         /* This shouldn't happen. We already finished tokenizing. */
 594         return NULL;
 595     }
 596
 597     /* Skip initial delimiters */
 598     nonDelimIdx = u_strspn(tokSource, delim);
 599     tokSource = &tokSource[nonDelimIdx];
 600
 601     if (*tokSource) {
 602         nextToken = u_strpbrk(tokSource, delim);
 603         if (nextToken != NULL) {
 604             /* Create a token */
 605             *(nextToken++) = 0;
 606             *saveState = nextToken;
 607             return tokSource;
 608         }
 609         else if (*saveState) {
 610             /* Return the last token */
 611             *saveState = NULL;
 612             return tokSource;
 613         }
 614     }
 615     else {
 616         /* No tokens were found. Only delimiters were left. */
 617         *saveState = NULL;
 618     }
 619     return NULL;
 620 }
 621
 622 /* Miscellaneous functions -------------------------------------------------- */
 623
 624 U_CAPI UChar* U_EXPORT2
 625 u_strcat(UChar     *dst,
 626     const UChar     *src)
 627 {
 628     UChar *anchor = dst;            /* save a pointer to start of dst */
 629
 630     while(*dst != 0) {              /* To end of first string          */
 631         ++dst;
 632     }
 633     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
 634     }
 635
 636     return anchor;
 637 }
 638
 639 U_CAPI UChar*  U_EXPORT2
 640 u_strncat(UChar     *dst,
 641      const UChar     *src,
 642      int32_t     n )
 643 {
 644     if(n > 0) {
 645         UChar *anchor = dst;            /* save a pointer to start of dst */
 646
 647         while(*dst != 0) {              /* To end of first string          */
 648             ++dst;
 649         }
 650         while((*dst = *src) != 0) {     /* copy string 2 over              */
 651             ++dst;
 652             if(--n == 0) {
 653                 *dst = 0;
 654                 break;
 655             }
 656             ++src;
 657         }
 658
 659         return anchor;
 660     } else {
 661         return dst;
 662     }
 663 }
 664
 665 /* ----- Text property functions --- */
 666
 667 U_CAPI int32_t   U_EXPORT2
 668 u_strcmp(const UChar *s1,
 669     const UChar *s2)
 670 {
 671     UChar  c1, c2;
 672
 673     for(;;) {
 674         c1=*s1++;
 675         c2=*s2++;
 676         if (c1 != c2 || c1 == 0) {
 677             break;
 678         }
 679     }
 680     return (int32_t)c1 - (int32_t)c2;
 681 }
 682
 683 U_CFUNC int32_t U_EXPORT2
 684 uprv_strCompare(const UChar *s1, int32_t length1,
 685                 const UChar *s2, int32_t length2,
 686                 UBool strncmpStyle, UBool codePointOrder) {
 687     const UChar *start1, *start2, *limit1, *limit2;
 688     UChar c1, c2;
 689
 690     /* setup for fix-up */
 691     start1=s1;
 692     start2=s2;
 693
 694     /* compare identical prefixes - they do not need to be fixed up */
 695     if(length1<0 && length2<0) {
 696         /* strcmp style, both NUL-terminated */
 697         if(s1==s2) {
 698             return 0;
 699         }
 700
 701         for(;;) {
 702             c1=*s1;
 703             c2=*s2;
 704             if(c1!=c2) {
 705                 break;
 706             }
 707             if(c1==0) {
 708                 return 0;
 709             }
 710             ++s1;
 711             ++s2;
 712         }
 713
 714         /* setup for fix-up */
 715         limit1=limit2=NULL;
 716     } else if(strncmpStyle) {
 717         /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
 718         if(s1==s2) {
 719             return 0;
 720         }
 721
 722         limit1=start1+length1;
 723
 724         for(;;) {
 725             /* both lengths are same, check only one limit */
 726             if(s1==limit1) {
 727                 return 0;
 728             }
 729
 730             c1=*s1;
 731             c2=*s2;
 732             if(c1!=c2) {
 733                 break;
 734             }
 735             if(c1==0) {
 736                 return 0;
 737             }
 738             ++s1;
 739             ++s2;
 740         }
 741
 742         /* setup for fix-up */
 743         limit2=start2+length1; /* use length1 here, too, to enforce assumption */
 744     } else {
 745         /* memcmp/UnicodeString style, both length-specified */
 746         int32_t lengthResult;
 747
 748         if(length1<0) {
 749             length1=u_strlen(s1);
 750         }
 751         if(length2<0) {
 752             length2=u_strlen(s2);
 753         }
 754
 755         /* limit1=start1+min(lenght1, length2) */
 756         if(length1<length2) {
 757             lengthResult=-1;
 758             limit1=start1+length1;
 759         } else if(length1==length2) {
 760             lengthResult=0;
 761             limit1=start1+length1;
 762         } else /* length1>length2 */ {
 763             lengthResult=1;
 764             limit1=start1+length2;
 765         }
 766
 767         if(s1==s2) {
 768             return lengthResult;
 769         }
 770
 771         for(;;) {
 772             /* check pseudo-limit */
 773             if(s1==limit1) {
 774                 return lengthResult;
 775             }
 776
 777             c1=*s1;
 778             c2=*s2;
 779             if(c1!=c2) {
 780                 break;
 781             }
 782             ++s1;
 783             ++s2;
 784         }
 785
 786         /* setup for fix-up */
 787         limit1=start1+length1;
 788         limit2=start2+length2;
 789     }
 790
 791     /* if both values are in or above the surrogate range, fix them up */
 792     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
 793         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
 794         if(
 795             (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
 796             (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
 797         ) {
 798             /* part of a surrogate pair, leave >=d800 */
 799         } else {
 800             /* BMP code point - may be surrogate code point - make <d800 */
 801             c1-=0x2800;
 802         }
 803
 804         if(
 805             (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
 806             (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
 807         ) {
 808             /* part of a surrogate pair, leave >=d800 */
 809         } else {
 810             /* BMP code point - may be surrogate code point - make <d800 */
 811             c2-=0x2800;
 812         }
 813     }
 814
 815     /* now c1 and c2 are in the requested (code unit or code point) order */
 816     return (int32_t)c1-(int32_t)c2;
 817 }
 818
 819 /*
 820  * Compare two strings as presented by UCharIterators.
 821  * Use code unit or code point order.
 822  * When the function returns, it is undefined where the iterators
 823  * have stopped.
 824  */
 825 U_CAPI int32_t U_EXPORT2
 826 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
 827     UChar32 c1, c2;
 828
 829     /* argument checking */
 830     if(iter1==NULL || iter2==NULL) {
 831         return 0; /* bad arguments */
 832     }
 833     if(iter1==iter2) {
 834         return 0; /* identical iterators */
 835     }
 836
 837     /* reset iterators to start? */
 838     iter1->move(iter1, 0, UITER_START);
 839     iter2->move(iter2, 0, UITER_START);
 840
 841     /* compare identical prefixes - they do not need to be fixed up */
 842     for(;;) {
 843         c1=iter1->next(iter1);
 844         c2=iter2->next(iter2);
 845         if(c1!=c2) {
 846             break;
 847         }
 848         if(c1==-1) {
 849             return 0;
 850         }
 851     }
 852
 853     /* if both values are in or above the surrogate range, fix them up */
 854     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
 855         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
 856         if(
 857             (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
 858             (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
 859         ) {
 860             /* part of a surrogate pair, leave >=d800 */
 861         } else {
 862             /* BMP code point - may be surrogate code point - make <d800 */
 863             c1-=0x2800;
 864         }
 865
 866         if(
 867             (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
 868             (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
 869         ) {
 870             /* part of a surrogate pair, leave >=d800 */
 871         } else {
 872             /* BMP code point - may be surrogate code point - make <d800 */
 873             c2-=0x2800;
 874         }
 875     }
 876
 877     /* now c1 and c2 are in the requested (code unit or code point) order */
 878     return (int32_t)c1-(int32_t)c2;
 879 }
 880
 881 #if 0
 882 /*
 883  * u_strCompareIter() does not leave the iterators _on_ the different units.
 884  * This is possible but would cost a few extra indirect function calls to back
 885  * up if the last unit (c1 or c2 respectively) was >=0.
 886  *
 887  * Consistently leaving them _behind_ the different units is not an option
 888  * because the current "unit" is the end of the string if that is reached,
 889  * and in such a case the iterator does not move.
 890  * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
 891  * of their strings. Calling previous() on each does not move them to where
 892  * the comparison fails.
 893  *
 894  * So the simplest semantics is to not define where the iterators end up.
 895  *
 896  * The following fragment is part of what would need to be done for backing up.
 897  */
 898 void fragment {
 899         /* iff a surrogate is part of a surrogate pair, leave >=d800 */
 900         if(c1<=0xdbff) {
 901             if(!U16_IS_TRAIL(iter1->current(iter1))) {
 902                 /* lead surrogate code point - make <d800 */
 903                 c1-=0x2800;
 904             }
 905         } else if(c1<=0xdfff) {
 906             int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
 907             iter1->previous(iter1); /* ==c1 */
 908             if(!U16_IS_LEAD(iter1->previous(iter1))) {
 909                 /* trail surrogate code point - make <d800 */
 910                 c1-=0x2800;
 911             }
 912             /* go back to behind where the difference is */
 913             iter1->move(iter1, idx, UITER_ZERO);
 914         } else /* 0xe000<=c1<=0xffff */ {
 915             /* BMP code point - make <d800 */
 916             c1-=0x2800;
 917         }
 918 }
 919 #endif
 920
 921 U_CAPI int32_t U_EXPORT2
 922 u_strCompare(const UChar *s1, int32_t length1,
 923              const UChar *s2, int32_t length2,
 924              UBool codePointOrder) {
 925     /* argument checking */
 926     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
 927         return 0;
 928     }
 929     return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
 930 }
 931
 932 /* String compare in code point order - u_strcmp() compares in code unit order. */
 933 U_CAPI int32_t U_EXPORT2
 934 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
 935     return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
 936 }
 937
 938 U_CAPI int32_t   U_EXPORT2
 939 u_strncmp(const UChar     *s1,
 940      const UChar     *s2,
 941      int32_t     n)
 942 {
 943     if(n > 0) {
 944         int32_t rc;
 945         for(;;) {
 946             rc = (int32_t)*s1 - (int32_t)*s2;
 947             if(rc != 0 || *s1 == 0 || --n == 0) {
 948                 return rc;
 949             }
 950             ++s1;
 951             ++s2;
 952         }
 953     } else {
 954         return 0;
 955     }
 956 }
 957
 958 U_CAPI int32_t U_EXPORT2
 959 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
 960     return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
 961 }
 962
 963 U_CAPI UChar* U_EXPORT2
 964 u_strcpy(UChar     *dst,
 965     const UChar     *src)
 966 {
 967     UChar *anchor = dst;            /* save a pointer to start of dst */
 968
 969     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
 970     }
 971
 972     return anchor;
 973 }
 974
 975 U_CAPI UChar*  U_EXPORT2
 976 u_strncpy(UChar     *dst,
 977      const UChar     *src,
 978      int32_t     n)
 979 {
 980     UChar *anchor = dst;            /* save a pointer to start of dst */
 981
 982     /* copy string 2 over */
 983     while(n > 0 && (*(dst++) = *(src++)) != 0) {
 984         --n;
 985     }
 986
 987     return anchor;
 988 }
 989
 990 U_CAPI int32_t   U_EXPORT2
 991 u_strlen(const UChar *s)
 992 {
 993 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
 994     return (int32_t)uprv_wcslen(s);
 995 #else
 996     const UChar *t = s;
 997     while(*t != 0) {
 998       ++t;
 999     }
1000     return t - s;
1001 #endif
1002 }
1003
1004 U_CAPI int32_t U_EXPORT2
1005 u_countChar32(const UChar *s, int32_t length) {
1006     int32_t count;
1007
1008     if(s==NULL || length<-1) {
1009         return 0;
1010     }
1011
1012     count=0;
1013     if(length>=0) {
1014         while(length>0) {
1015             ++count;
1016             if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
1017                 s+=2;
1018                 length-=2;
1019             } else {
1020                 ++s;
1021                 --length;
1022             }
1023         }
1024     } else /* length==-1 */ {
1025         UChar c;
1026
1027         for(;;) {
1028             if((c=*s++)==0) {
1029                 break;
1030             }
1031             ++count;
1032
1033             /*
1034              * sufficient to look ahead one because of UTF-16;
1035              * safe to look ahead one because at worst that would be the terminating NUL
1036              */
1037             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1038                 ++s;
1039             }
1040         }
1041     }
1042     return count;
1043 }
1044
1045 U_CAPI UBool U_EXPORT2
1046 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1047
1048     if(number<0) {
1049         return TRUE;
1050     }
1051     if(s==NULL || length<-1) {
1052         return FALSE;
1053     }
1054
1055     if(length==-1) {
1056         /* s is NUL-terminated */
1057         UChar c;
1058
1059         /* count code points until they exceed */
1060         for(;;) {
1061             if((c=*s++)==0) {
1062                 return FALSE;
1063             }
1064             if(number==0) {
1065                 return TRUE;
1066             }
1067             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1068                 ++s;
1069             }
1070             --number;
1071         }
1072     } else {
1073         /* length>=0 known */
1074         const UChar *limit;
1075         int32_t maxSupplementary;
1076
1077         /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
1078         if(((length+1)/2)>number) {
1079             return TRUE;
1080         }
1081
1082         /* check if s does not even contain enough UChars */
1083         maxSupplementary=length-number;
1084         if(maxSupplementary<=0) {
1085             return FALSE;
1086         }
1087         /* there are maxSupplementary=length-number more UChars than asked-for code points */
1088
1089         /*
1090          * count code points until they exceed and also check that there are
1091          * no more than maxSupplementary supplementary code points (UChar pairs)
1092          */
1093         limit=s+length;
1094         for(;;) {
1095             if(s==limit) {
1096                 return FALSE;
1097             }
1098             if(number==0) {
1099                 return TRUE;
1100             }
1101             if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
1102                 ++s;
1103                 if(--maxSupplementary<=0) {
1104                     /* too many pairs - too few code points */
1105                     return FALSE;
1106                 }
1107             }
1108             --number;
1109         }
1110     }
1111 }
1112
1113 U_CAPI UChar * U_EXPORT2
1114 u_memcpy(UChar *dest, const UChar *src, int32_t count) {
1115     if(count > 0) {
1116         uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
1117     }
1118     return dest;
1119 }
1120
1121 U_CAPI UChar * U_EXPORT2
1122 u_memmove(UChar *dest, const UChar *src, int32_t count) {
1123     if(count > 0) {
1124         uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
1125     }
1126     return dest;
1127 }
1128
1129 U_CAPI UChar * U_EXPORT2
1130 u_memset(UChar *dest, UChar c, int32_t count) {
1131     if(count > 0) {
1132         UChar *ptr = dest;
1133         UChar *limit = dest + count;
1134
1135         while (ptr < limit) {
1136             *(ptr++) = c;
1137         }
1138     }
1139     return dest;
1140 }
1141
1142 U_CAPI int32_t U_EXPORT2
1143 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
1144     if(count > 0) {
1145         const UChar *limit = buf1 + count;
1146         int32_t result;
1147
1148         while (buf1 < limit) {
1149             result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
1150             if (result != 0) {
1151                 return result;
1152             }
1153             buf1++;
1154             buf2++;
1155         }
1156     }
1157     return 0;
1158 }
1159
1160 U_CAPI int32_t U_EXPORT2
1161 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
1162     return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
1163 }
1164
1165 /* u_unescape & support fns ------------------------------------------------- */
1166
1167 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
1168 static const UChar UNESCAPE_MAP[] = {
1169     /*"   0x22, 0x22 */
1170     /*'   0x27, 0x27 */
1171     /*?   0x3F, 0x3F */
1172     /*\   0x5C, 0x5C */
1173     /*a*/ 0x61, 0x07,
1174     /*b*/ 0x62, 0x08,
1175     /*e*/ 0x65, 0x1b,
1176     /*f*/ 0x66, 0x0c,
1177     /*n*/ 0x6E, 0x0a,
1178     /*r*/ 0x72, 0x0d,
1179     /*t*/ 0x74, 0x09,
1180     /*v*/ 0x76, 0x0b
1181 };
1182 enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
1183
1184 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */
1185 static int8_t _digit8(UChar c) {
1186     if (c >= 0x0030 && c <= 0x0037) {
1187         return (int8_t)(c - 0x0030);
1188     }
1189     return -1;
1190 }
1191
1192 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */
1193 static int8_t _digit16(UChar c) {
1194     if (c >= 0x0030 && c <= 0x0039) {
1195         return (int8_t)(c - 0x0030);
1196     }
1197     if (c >= 0x0041 && c <= 0x0046) {
1198         return (int8_t)(c - (0x0041 - 10));
1199     }
1200     if (c >= 0x0061 && c <= 0x0066) {
1201         return (int8_t)(c - (0x0061 - 10));
1202     }
1203     return -1;
1204 }
1205
1206 /* Parse a single escape sequence.  Although this method deals in
1207  * UChars, it does not use C++ or UnicodeString.  This allows it to
1208  * be used from C contexts. */
1209 U_CAPI UChar32 U_EXPORT2
1210 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
1211              int32_t *offset,
1212              int32_t length,
1213              void *context) {
1214
1215     int32_t start = *offset;
1216     UChar c;
1217     UChar32 result = 0;
1218     int8_t n = 0;
1219     int8_t minDig = 0;
1220     int8_t maxDig = 0;
1221     int8_t bitsPerDigit = 4;
1222     int8_t dig;
1223     int32_t i;
1224     UBool braces = FALSE;
1225
1226     /* Check that offset is in range */
1227     if (*offset < 0 || *offset >= length) {
1228         goto err;
1229     }
1230
1231     /* Fetch first UChar after '\\' */
1232     c = charAt((*offset)++, context);
1233
1234     /* Convert hexadecimal and octal escapes */
1235     switch (c) {
1236     case 0x0075 /*'u'*/:
1237         minDig = maxDig = 4;
1238         break;
1239     case 0x0055 /*'U'*/:
1240         minDig = maxDig = 8;
1241         break;
1242     case 0x0078 /*'x'*/:
1243         minDig = 1;
1244         if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
1245             ++(*offset);
1246             braces = TRUE;
1247             maxDig = 8;
1248         } else {
1249             maxDig = 2;
1250         }
1251         break;
1252     default:
1253         dig = _digit8(c);
1254         if (dig >= 0) {
1255             minDig = 1;
1256             maxDig = 3;
1257             n = 1; /* Already have first octal digit */
1258             bitsPerDigit = 3;
1259             result = dig;
1260         }
1261         break;
1262     }
1263     if (minDig != 0) {
1264         while (*offset < length && n < maxDig) {
1265             c = charAt(*offset, context);
1266             dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
1267             if (dig < 0) {
1268                 break;
1269             }
1270             result = (result << bitsPerDigit) | dig;
1271             ++(*offset);
1272             ++n;
1273         }
1274         if (n < minDig) {
1275             goto err;
1276         }
1277         if (braces) {
1278             if (c != 0x7D /*}*/) {
1279                 goto err;
1280             }
1281             ++(*offset);
1282         }
1283         if (result < 0 || result >= 0x110000) {
1284             goto err;
1285         }
1286         /* If an escape sequence specifies a lead surrogate, see if
1287          * there is a trail surrogate after it, either as an escape or
1288          * as a literal.  If so, join them up into a supplementary.
1289          */
1290         if (*offset < length && U16_IS_LEAD(result)) {
1291             int32_t ahead = *offset + 1;
1292             c = charAt(*offset, context);
1293             if (c == 0x5C /*'\\'*/ && ahead < length) {
1294                 c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
1295             }
1296             if (U16_IS_TRAIL(c)) {
1297                 *offset = ahead;
1298                 result = U16_GET_SUPPLEMENTARY(result, c);
1299             }
1300         }
1301         return result;
1302     }
1303
1304     /* Convert C-style escapes in table */
1305     for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
1306         if (c == UNESCAPE_MAP[i]) {
1307             return UNESCAPE_MAP[i+1];
1308         } else if (c < UNESCAPE_MAP[i]) {
1309             break;
1310         }
1311     }
1312
1313     /* Map \cX to control-X: X & 0x1F */
1314     if (c == 0x0063 /*'c'*/ && *offset < length) {
1315         c = charAt((*offset)++, context);
1316         if (U16_IS_LEAD(c) && *offset < length) {
1317             UChar c2 = charAt(*offset, context);
1318             if (U16_IS_TRAIL(c2)) {
1319                 ++(*offset);
1320                 c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
1321             }
1322         }
1323         return 0x1F & c;
1324     }
1325
1326     /* If no special forms are recognized, then consider
1327      * the backslash to generically escape the next character.
1328      * Deal with surrogate pairs. */
1329     if (U16_IS_LEAD(c) && *offset < length) {
1330         UChar c2 = charAt(*offset, context);
1331         if (U16_IS_TRAIL(c2)) {
1332             ++(*offset);
1333             return U16_GET_SUPPLEMENTARY(c, c2);
1334         }
1335     }
1336     return c;
1337
1338  err:
1339     /* Invalid escape sequence */
1340     *offset = start; /* Reset to initial value */
1341     return (UChar32)0xFFFFFFFF;
1342 }
1343
1344 /* u_unescapeAt() callback to return a UChar from a char* */
1345 static UChar U_CALLCONV
1346 _charPtr_charAt(int32_t offset, void *context) {
1347     UChar c16;
1348     /* It would be more efficient to access the invariant tables
1349      * directly but there is no API for that. */
1350     u_charsToUChars(((char*) context) + offset, &c16, 1);
1351     return c16;
1352 }
1353
1354 /* Append an escape-free segment of the text; used by u_unescape() */
1355 static void _appendUChars(UChar *dest, int32_t destCapacity,
1356                           const char *src, int32_t srcLen) {
1357     if (destCapacity < 0) {
1358         destCapacity = 0;
1359     }
1360     if (srcLen > destCapacity) {
1361         srcLen = destCapacity;
1362     }
1363     u_charsToUChars(src, dest, srcLen);
1364 }
1365
1366 /* Do an invariant conversion of char* -> UChar*, with escape parsing */
1367 U_CAPI int32_t U_EXPORT2
1368 u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
1369     const char *segment = src;
1370     int32_t i = 0;
1371     char c;
1372
1373     while ((c=*src) != 0) {
1374         /* '\\' intentionally written as compiler-specific
1375          * character constant to correspond to compiler-specific
1376          * char* constants. */
1377         if (c == '\\') {
1378             int32_t lenParsed = 0;
1379             UChar32 c32;
1380             if (src != segment) {
1381                 if (dest != NULL) {
1382                     _appendUChars(dest + i, destCapacity - i,
1383                                   segment, (int32_t)(src - segment));
1384                 }
1385                 i += (int32_t)(src - segment);
1386             }
1387             ++src; /* advance past '\\' */
1388             c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
1389             if (lenParsed == 0) {
1390                 goto err;
1391             }
1392             src += lenParsed; /* advance past escape seq. */
1393             if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
1394                 U16_APPEND_UNSAFE(dest, i, c32);
1395             } else {
1396                 i += U16_LENGTH(c32);
1397             }
1398             segment = src;
1399         } else {
1400             ++src;
1401         }
1402     }
1403     if (src != segment) {
1404         if (dest != NULL) {
1405             _appendUChars(dest + i, destCapacity - i,
1406                           segment, (int32_t)(src - segment));
1407         }
1408         i += (int32_t)(src - segment);
1409     }
1410     if (dest != NULL && i < destCapacity) {
1411         dest[i] = 0;
1412     }
1413     return i;
1414
1415  err:
1416     if (dest != NULL && destCapacity > 0) {
1417         *dest = 0;
1418     }
1419     return 0;
1420 }
1421
1422 /* NUL-termination of strings ----------------------------------------------- */
1423
1424 /**
1425  * NUL-terminate a string no matter what its type.
1426  * Set warning and error codes accordingly.
1427  */
1428 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
1429     if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
1430         /* not a public function, so no complete argument checking */   \
1431                                                                         \
1432         if(length<0) {                                                  \
1433             /* assume that the caller handles this */                   \
1434         } else if(length<destCapacity) {                                \
1435             /* NUL-terminate the string, the NUL fits */                \
1436             dest[length]=0;                                             \
1437             /* unset the not-terminated warning but leave all others */ \
1438             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
1439                 *pErrorCode=U_ZERO_ERROR;                               \
1440             }                                                           \
1441         } else if(length==destCapacity) {                               \
1442             /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
1443             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
1444         } else /* length>destCapacity */ {                              \
1445             /* even the string itself did not fit - set an error code */ \
1446             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
1447         }                                                               \
1448     }
1449
1450 U_CAPI int32_t U_EXPORT2
1451 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1452     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1453     return length;
1454 }
1455
1456 U_CAPI int32_t U_EXPORT2
1457 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1458     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1459     return length;
1460 }
1461
1462 U_CAPI int32_t U_EXPORT2
1463 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1464     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1465     return length;
1466 }
1467
1468 U_CAPI int32_t U_EXPORT2
1469 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1470     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1471     return length;
1472 }
1473
1474 // Compute the hash code for a string -------------------------------------- ***
1475
1476 // Moved here from uhash.c so that UnicodeString::hashCode() does not depend
1477 // on UHashtable code.
1478
1479 /*
1480   Compute the hash by iterating sparsely over about 32 (up to 63)
1481   characters spaced evenly through the string.  For each character,
1482   multiply the previous hash value by a prime number and add the new
1483   character in, like a linear congruential random number generator,
1484   producing a pseudorandom deterministic value well distributed over
1485   the output range. [LIU]
1486 */
1487
1488 #define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
1489     uint32_t hash = 0;                        \
1490     const TYPE *p = (const TYPE*) STR;        \
1491     if (p != NULL) {                          \
1492         int32_t len = (int32_t)(STRLEN);      \
1493         int32_t inc = ((len - 32) / 32) + 1;  \
1494         const TYPE *limit = p + len;          \
1495         while (p<limit) {                     \
1496             hash = (hash * 37) + DEREF;       \
1497             p += inc;                         \
1498         }                                     \
1499     }                                         \
1500     return static_cast<int32_t>(hash)
1501
1502 /* Used by UnicodeString to compute its hashcode - Not public API. */
1503 U_CAPI int32_t U_EXPORT2
1504 ustr_hashUCharsN(const UChar *str, int32_t length) {
1505     STRING_HASH(UChar, str, length, *p);
1506 }
1507
1508 U_CAPI int32_t U_EXPORT2
1509 ustr_hashCharsN(const char *str, int32_t length) {
1510     STRING_HASH(uint8_t, str, length, *p);
1511 }
1512
1513 U_CAPI int32_t U_EXPORT2
1514 ustr_hashICharsN(const char *str, int32_t length) {
1515     STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
1516 }