]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/ustrtrns.cpp
   2 ****************************************************************************** 
   4 *   Copyright (C) 2001-2016, International Business Machines 
   5 *   Corporation and others.  All Rights Reserved. 
   7 ****************************************************************************** 
  11 * Modification History: 
  13 *   Date        Name        Description 
  14 *   9/10/2001    Ram    Creation. 
  15 ****************************************************************************** 
  18 /******************************************************************************* 
  20  * u_strTo* and u_strFrom* APIs 
  21  * WCS functions moved to ustr_wcs.c for better modularization 
  23  ******************************************************************************* 
  27 #include "unicode/putil.h" 
  28 #include "unicode/ustring.h" 
  29 #include "unicode/utf.h" 
  30 #include "unicode/utf8.h" 
  31 #include "unicode/utf16.h" 
  37 U_CAPI UChar
* U_EXPORT2 
 
  38 u_strFromUTF32WithSub(UChar 
*dest
, 
  43                UChar32 subchar
, int32_t *pNumSubstitutions
, 
  44                UErrorCode 
*pErrorCode
) { 
  45     const UChar32 
*srcLimit
; 
  50     int32_t numSubstitutions
; 
  53     if(U_FAILURE(*pErrorCode
)){ 
  56     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
  57         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
  58         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
  60         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
  64     if(pNumSubstitutions 
!= NULL
) { 
  65         *pNumSubstitutions 
= 0; 
  69     destLimit 
= (dest
!=NULL
)?(dest 
+ destCapacity
):NULL
; 
  74         /* simple loop for conversion of a NUL-terminated BMP string */ 
  75         while((ch
=*src
) != 0 && 
  76               ((uint32_t)ch 
< 0xd800 || (0xe000 <= ch 
&& ch 
<= 0xffff))) { 
  78             if(pDest 
< destLimit
) { 
  86             /* "complicated" case, find the end of the remaining string */ 
  87             while(*++srcLimit 
!= 0) {} 
  90       srcLimit 
= (src
!=NULL
)?(src 
+ srcLength
):NULL
; 
  93     /* convert with length */ 
  94     while(src 
< srcLimit
) { 
  97             /* usually "loops" once; twice only for writing subchar */ 
  98             if((uint32_t)ch 
< 0xd800 || (0xe000 <= ch 
&& ch 
<= 0xffff)) { 
  99                 if(pDest 
< destLimit
) { 
 100                     *pDest
++ = (UChar
)ch
; 
 105             } else if(0x10000 <= ch 
&& ch 
<= 0x10ffff) { 
 106                 if(pDest
!=NULL 
&& ((pDest 
+ 2) <= destLimit
)) { 
 107                     *pDest
++ = U16_LEAD(ch
); 
 108                     *pDest
++ = U16_TRAIL(ch
); 
 113             } else if((ch 
= subchar
) < 0) { 
 114                 /* surrogate code point, or not a Unicode code point at all */ 
 115                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 123     reqLength 
+= (int32_t)(pDest 
- dest
); 
 125         *pDestLength 
= reqLength
; 
 127     if(pNumSubstitutions 
!= NULL
) { 
 128         *pNumSubstitutions 
= numSubstitutions
; 
 131     /* Terminate the buffer */ 
 132     u_terminateUChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
 137 U_CAPI UChar
* U_EXPORT2 
 
 138 u_strFromUTF32(UChar 
*dest
, 
 139                int32_t destCapacity
,  
 140                int32_t *pDestLength
, 
 143                UErrorCode 
*pErrorCode
) { 
 144     return u_strFromUTF32WithSub( 
 145             dest
, destCapacity
, pDestLength
, 
 151 U_CAPI UChar32
* U_EXPORT2 
 
 152 u_strToUTF32WithSub(UChar32 
*dest
, 
 153              int32_t destCapacity
, 
 154              int32_t *pDestLength
, 
 157              UChar32 subchar
, int32_t *pNumSubstitutions
, 
 158              UErrorCode 
*pErrorCode
) { 
 159     const UChar 
*srcLimit
; 
 165     int32_t numSubstitutions
; 
 168     if(U_FAILURE(*pErrorCode
)){ 
 171     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
 172         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
 173         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
 175         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 179     if(pNumSubstitutions 
!= NULL
) { 
 180         *pNumSubstitutions 
= 0; 
 184     destLimit 
= (dest
!=NULL
)?(dest 
+ destCapacity
):NULL
; 
 186     numSubstitutions 
= 0; 
 189         /* simple loop for conversion of a NUL-terminated BMP string */ 
 190         while((ch
=*src
) != 0 && !U16_IS_SURROGATE(ch
)) { 
 192             if(pDest 
< destLimit
) { 
 200             /* "complicated" case, find the end of the remaining string */ 
 201             while(*++srcLimit 
!= 0) {} 
 204         srcLimit 
= (src
!=NULL
)?(src 
+ srcLength
):NULL
; 
 207     /* convert with length */ 
 208     while(src 
< srcLimit
) { 
 210         if(!U16_IS_SURROGATE(ch
)) { 
 211             /* write or count ch below */ 
 212         } else if(U16_IS_SURROGATE_LEAD(ch
) && src 
< srcLimit 
&& U16_IS_TRAIL(ch2 
= *src
)) { 
 214             ch 
= U16_GET_SUPPLEMENTARY(ch
, ch2
); 
 215         } else if((ch 
= subchar
) < 0) { 
 216             /* unpaired surrogate */ 
 217             *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 222         if(pDest 
< destLimit
) { 
 229     reqLength 
+= (int32_t)(pDest 
- dest
); 
 231         *pDestLength 
= reqLength
; 
 233     if(pNumSubstitutions 
!= NULL
) { 
 234         *pNumSubstitutions 
= numSubstitutions
; 
 237     /* Terminate the buffer */ 
 238     u_terminateUChar32s(dest
, destCapacity
, reqLength
, pErrorCode
); 
 243 U_CAPI UChar32
* U_EXPORT2 
 
 244 u_strToUTF32(UChar32 
*dest
,  
 245              int32_t destCapacity
, 
 246              int32_t *pDestLength
, 
 249              UErrorCode 
*pErrorCode
) { 
 250     return u_strToUTF32WithSub( 
 251             dest
, destCapacity
, pDestLength
, 
 257 /* for utf8_nextCharSafeBodyTerminated() */ 
 259 utf8_minLegal
[4]={ 0, 0x80, 0x800, 0x10000 }; 
 262  * Version of utf8_nextCharSafeBody() with the following differences: 
 263  * - checks for NUL termination instead of length 
 264  * - works with pointers instead of indexes 
 265  * - always strict (strict==-1) 
 267  * *ps points to after the lead byte and will be moved to after the last trail byte. 
 268  * c is the lead byte. 
 269  * @return the code point, or U_SENTINEL 
 272 utf8_nextCharSafeBodyTerminated(const uint8_t **ps
, UChar32 c
) { 
 273     const uint8_t *s
=*ps
; 
 274     uint8_t trail
, illegal
=0; 
 275     uint8_t count
=U8_COUNT_TRAIL_BYTES(c
); 
 277     U8_MASK_LEAD_BYTE((c
), count
); 
 278     /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 
 280     /* each branch falls through to the next one */ 
 283         /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 
 287         trail
=(uint8_t)(*s
++ - 0x80); 
 289         if(trail
>0x3f || c
>=0x110) { 
 290             /* not a trail byte, or code point>0x10ffff (outside Unicode) */ 
 296         trail
=(uint8_t)(*s
++ - 0x80); 
 298             /* not a trail byte */ 
 305         trail
=(uint8_t)(*s
++ - 0x80); 
 307             /* not a trail byte */ 
 314     /* no default branch to optimize switch()  - all values are covered */ 
 317     /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 
 318     /* illegal is also set if count>=4 */ 
 319     if(illegal 
|| c
<utf8_minLegal
[count
] || U_IS_SURROGATE(c
)) { 
 321         /* don't go beyond this sequence */ 
 323         while(count
>0 && U8_IS_TRAIL(*s
)) { 
 334  * Version of utf8_nextCharSafeBody() with the following differences: 
 335  * - works with pointers instead of indexes 
 336  * - always strict (strict==-1) 
 338  * *ps points to after the lead byte and will be moved to after the last trail byte. 
 339  * c is the lead byte. 
 340  * @return the code point, or U_SENTINEL 
 343 utf8_nextCharSafeBodyPointer(const uint8_t **ps
, const uint8_t *limit
, UChar32 c
) { 
 344     const uint8_t *s
=*ps
; 
 345     uint8_t trail
, illegal
=0; 
 346     uint8_t count
=U8_COUNT_TRAIL_BYTES(c
); 
 347     if((limit
-s
)>=count
) { 
 348         U8_MASK_LEAD_BYTE((c
), count
); 
 349         /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 
 351         /* each branch falls through to the next one */ 
 354             /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 
 359             c
=(c
<<6)|(trail
&0x3f); 
 361                 illegal
|=(trail
&0xc0)^0x80; 
 363                 /* code point>0x10ffff, outside Unicode */ 
 370             c
=(c
<<6)|(trail
&0x3f); 
 371             illegal
|=(trail
&0xc0)^0x80; 
 375             c
=(c
<<6)|(trail
&0x3f); 
 376             illegal
|=(trail
&0xc0)^0x80; 
 380         /* no default branch to optimize switch()  - all values are covered */ 
 383         illegal
=1; /* too few bytes left */ 
 386     /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 
 387     /* illegal is also set if count>=4 */ 
 388     U_ASSERT(illegal 
|| count
<UPRV_LENGTHOF(utf8_minLegal
)); 
 389     if(illegal 
|| c
<utf8_minLegal
[count
] || U_IS_SURROGATE(c
)) { 
 391         /* don't go beyond this sequence */ 
 393         while(count
>0 && s
<limit 
&& U8_IS_TRAIL(*s
)) { 
 403 U_CAPI UChar
* U_EXPORT2
 
 404 u_strFromUTF8WithSub(UChar 
*dest
, 
 405               int32_t destCapacity
, 
 406               int32_t *pDestLength
, 
 409               UChar32 subchar
, int32_t *pNumSubstitutions
, 
 410               UErrorCode 
*pErrorCode
){ 
 412     UChar 
*pDestLimit 
= dest
+destCapacity
; 
 414     int32_t reqLength 
= 0; 
 415     const uint8_t* pSrc 
= (const uint8_t*) src
; 
 416     uint8_t t1
, t2
; /* trail bytes */ 
 417     int32_t numSubstitutions
; 
 420     if(pErrorCode
==NULL 
|| U_FAILURE(*pErrorCode
)){ 
 424     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
 425         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
 426         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
 428         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 432     if(pNumSubstitutions
!=NULL
) { 
 433         *pNumSubstitutions
=0; 
 438      * Inline processing of UTF-8 byte sequences: 
 440      * Byte sequences for the most common characters are handled inline in 
 441      * the conversion loops. In order to reduce the path lengths for those 
 442      * characters, the tests are arranged in a kind of binary search. 
 443      * ASCII (<=0x7f) is checked first, followed by the dividing point 
 444      * between 2- and 3-byte sequences (0xe0). 
 445      * The 3-byte branch is tested first to speed up CJK text. 
 446      * The compiler should combine the subtractions for the two tests for 0xe0. 
 447      * Each branch then tests for the other end of its range. 
 452          * Transform a NUL-terminated string. 
 453          * The code explicitly checks for NULs only in the lead byte position. 
 454          * A NUL byte in the trail byte position fails the trail byte range check anyway. 
 456         while(((ch 
= *pSrc
) != 0) && (pDest 
< pDestLimit
)) { 
 462                     if( /* handle U+1000..U+CFFF inline */ 
 464                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
 465                         (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
 467                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 468                         *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
 472                 } else if(ch 
< 0xe0) { 
 473                     if( /* handle U+0080..U+07FF inline */ 
 475                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
 477                         *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
 483                 /* function call for "complicated" and error cases */ 
 484                 ++pSrc
; /* continue after the lead byte */ 
 485                 ch
=utf8_nextCharSafeBodyTerminated(&pSrc
, ch
); 
 486                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0) { 
 487                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 489                 } else if(ch
<=0xFFFF) { 
 490                     *(pDest
++)=(UChar
)ch
; 
 492                     *(pDest
++)=U16_LEAD(ch
); 
 493                     if(pDest
<pDestLimit
) { 
 494                         *(pDest
++)=U16_TRAIL(ch
); 
 503         /* Pre-flight the rest of the string. */ 
 504         while((ch 
= *pSrc
) != 0) { 
 510                     if( /* handle U+1000..U+CFFF inline */ 
 512                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f && 
 513                         (uint8_t)(pSrc
[2] - 0x80) <= 0x3f 
 519                 } else if(ch 
< 0xe0) { 
 520                     if( /* handle U+0080..U+07FF inline */ 
 522                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f 
 530                 /* function call for "complicated" and error cases */ 
 531                 ++pSrc
; /* continue after the lead byte */ 
 532                 ch
=utf8_nextCharSafeBodyTerminated(&pSrc
, ch
); 
 533                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0) { 
 534                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 537                 reqLength 
+= U16_LENGTH(ch
); 
 540     } else /* srcLength >= 0 */ { 
 541         const uint8_t *pSrcLimit 
= pSrc 
+ srcLength
; 
 544         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
 547              * Each iteration of the inner loop progresses by at most 3 UTF-8 
 548              * bytes and one UChar, for most characters. 
 549              * For supplementary code points (4 & 2), which are rare, 
 550              * there is an additional adjustment. 
 552             count 
= (int32_t)(pDestLimit 
- pDest
); 
 553             srcLength 
= (int32_t)((pSrcLimit 
- pSrc
) / 3); 
 554             if(count 
> srcLength
) { 
 555                 count 
= srcLength
; /* min(remaining dest, remaining src/3) */ 
 559                  * Too much overhead if we get near the end of the string, 
 560                  * continue with the next loop. 
 572                         if( /* handle U+1000..U+CFFF inline */ 
 574                             (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
 575                             (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
 577                             /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 578                             *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
 582                     } else if(ch 
< 0xe0) { 
 583                         if( /* handle U+0080..U+07FF inline */ 
 585                             (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
 587                             *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
 593                     if(ch 
>= 0xf0 || subchar 
> 0xffff) { 
 595                          * We may read up to six bytes and write up to two UChars, 
 596                          * which we didn't account for with computing count, 
 597                          * so we adjust it here. 
 604                     /* function call for "complicated" and error cases */ 
 605                     ++pSrc
; /* continue after the lead byte */ 
 606                     ch
=utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
 607                     if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0){ 
 608                         *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 610                     }else if(ch
<=0xFFFF){ 
 611                         *(pDest
++)=(UChar
)ch
; 
 613                         *(pDest
++)=U16_LEAD(ch
); 
 614                         *(pDest
++)=U16_TRAIL(ch
); 
 617             } while(--count 
> 0); 
 620         while((pSrc
<pSrcLimit
) && (pDest
<pDestLimit
)) { 
 627                     if( /* handle U+1000..U+CFFF inline */ 
 629                         ((pSrcLimit 
- pSrc
) >= 3) && 
 630                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
 631                         (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
 633                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 634                         *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
 638                 } else if(ch 
< 0xe0) { 
 639                     if( /* handle U+0080..U+07FF inline */ 
 641                         ((pSrcLimit 
- pSrc
) >= 2) && 
 642                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
 644                         *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
 650                 /* function call for "complicated" and error cases */ 
 651                 ++pSrc
; /* continue after the lead byte */ 
 652                 ch
=utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
 653                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0){ 
 654                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 656                 }else if(ch
<=0xFFFF){ 
 657                     *(pDest
++)=(UChar
)ch
; 
 659                     *(pDest
++)=U16_LEAD(ch
); 
 660                     if(pDest
<pDestLimit
){ 
 661                         *(pDest
++)=U16_TRAIL(ch
); 
 669         /* do not fill the dest buffer just count the UChars needed */ 
 670         while(pSrc 
< pSrcLimit
){ 
 677                     if( /* handle U+1000..U+CFFF inline */ 
 679                         ((pSrcLimit 
- pSrc
) >= 3) && 
 680                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f && 
 681                         (uint8_t)(pSrc
[2] - 0x80) <= 0x3f 
 687                 } else if(ch 
< 0xe0) { 
 688                     if( /* handle U+0080..U+07FF inline */ 
 690                         ((pSrcLimit 
- pSrc
) >= 2) && 
 691                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f 
 699                 /* function call for "complicated" and error cases */ 
 700                 ++pSrc
; /* continue after the lead byte */ 
 701                 ch
=utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
 702                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0){ 
 703                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 706                 reqLength
+=U16_LENGTH(ch
); 
 711     reqLength
+=(int32_t)(pDest 
- dest
); 
 713     if(pNumSubstitutions
!=NULL
) { 
 714         *pNumSubstitutions
=numSubstitutions
; 
 718         *pDestLength 
= reqLength
; 
 721     /* Terminate the buffer */ 
 722     u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
); 
 727 U_CAPI UChar
* U_EXPORT2
 
 728 u_strFromUTF8(UChar 
*dest
, 
 729               int32_t destCapacity
, 
 730               int32_t *pDestLength
, 
 733               UErrorCode 
*pErrorCode
){ 
 734     return u_strFromUTF8WithSub( 
 735             dest
, destCapacity
, pDestLength
, 
 741 U_CAPI UChar 
* U_EXPORT2
 
 742 u_strFromUTF8Lenient(UChar 
*dest
, 
 743                      int32_t destCapacity
, 
 744                      int32_t *pDestLength
, 
 747                      UErrorCode 
*pErrorCode
) { 
 750     int32_t reqLength 
= 0; 
 751     uint8_t* pSrc 
= (uint8_t*) src
; 
 754     if(pErrorCode
==NULL 
|| U_FAILURE(*pErrorCode
)){ 
 758     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
 759         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) 
 761         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 766         /* Transform a NUL-terminated string. */ 
 767         UChar 
*pDestLimit 
= (dest
!=NULL
)?(dest
+destCapacity
):NULL
; 
 768         uint8_t t1
, t2
, t3
; /* trail bytes */ 
 770         while(((ch 
= *pSrc
) != 0) && (pDest 
< pDestLimit
)) { 
 773                  * ASCII, or a trail byte in lead position which is treated like 
 774                  * a single-byte sequence for better character boundary 
 775                  * resynchronization after illegal sequences. 
 780             } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 781                 if((t1 
= pSrc
[1]) != 0) { 
 782                     /* 0x3080 = (0xc0 << 6) + 0x80 */ 
 783                     *pDest
++ = (UChar
)((ch 
<< 6) + t1 
- 0x3080); 
 787             } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 788                 if((t1 
= pSrc
[1]) != 0 && (t2 
= pSrc
[2]) != 0) { 
 789                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 790                     /* 0x2080 = (0x80 << 6) + 0x80 */ 
 791                     *pDest
++ = (UChar
)((ch 
<< 12) + (t1 
<< 6) + t2 
- 0x2080); 
 795             } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 796                 if((t1 
= pSrc
[1]) != 0 && (t2 
= pSrc
[2]) != 0 && (t3 
= pSrc
[3]) != 0) { 
 798                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 
 799                     ch 
= (ch 
<< 18) + (t1 
<< 12) + (t2 
<< 6) + t3 
- 0x3c82080; 
 800                     *(pDest
++) = U16_LEAD(ch
); 
 801                     if(pDest 
< pDestLimit
) { 
 802                         *(pDest
++) = U16_TRAIL(ch
); 
 811             /* truncated character at the end */ 
 813             while(*++pSrc 
!= 0) {} 
 817         /* Pre-flight the rest of the string. */ 
 818         while((ch 
= *pSrc
) != 0) { 
 821                  * ASCII, or a trail byte in lead position which is treated like 
 822                  * a single-byte sequence for better character boundary 
 823                  * resynchronization after illegal sequences. 
 828             } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 834             } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 835                 if(pSrc
[1] != 0 && pSrc
[2] != 0) { 
 840             } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 841                 if(pSrc
[1] != 0 && pSrc
[2] != 0 && pSrc
[3] != 0) { 
 848             /* truncated character at the end */ 
 852     } else /* srcLength >= 0 */ { 
 853       const uint8_t *pSrcLimit 
= (pSrc
!=NULL
)?(pSrc 
+ srcLength
):NULL
; 
 856          * This function requires that if srcLength is given, then it must be 
 857          * destCapatity >= srcLength so that we need not check for 
 858          * destination buffer overflow in the loop. 
 860         if(destCapacity 
< srcLength
) { 
 861             if(pDestLength 
!= NULL
) { 
 862                 *pDestLength 
= srcLength
; /* this likely overestimates the true destLength! */ 
 864             *pErrorCode 
= U_BUFFER_OVERFLOW_ERROR
; 
 868         if((pSrcLimit 
- pSrc
) >= 4) { 
 869             pSrcLimit 
-= 3; /* temporarily reduce pSrcLimit */ 
 871             /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ 
 876                      * ASCII, or a trail byte in lead position which is treated like 
 877                      * a single-byte sequence for better character boundary 
 878                      * resynchronization after illegal sequences. 
 881                 } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 882                     /* 0x3080 = (0xc0 << 6) + 0x80 */ 
 883                     *pDest
++ = (UChar
)((ch 
<< 6) + *pSrc
++ - 0x3080); 
 884                 } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 885                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 886                     /* 0x2080 = (0x80 << 6) + 0x80 */ 
 887                     ch 
= (ch 
<< 12) + (*pSrc
++ << 6); 
 888                     *pDest
++ = (UChar
)(ch 
+ *pSrc
++ - 0x2080); 
 889                 } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 890                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 
 891                     ch 
= (ch 
<< 18) + (*pSrc
++ << 12); 
 893                     ch 
+= *pSrc
++ - 0x3c82080; 
 894                     *(pDest
++) = U16_LEAD(ch
); 
 895                     *(pDest
++) = U16_TRAIL(ch
); 
 897             } while(pSrc 
< pSrcLimit
); 
 899             pSrcLimit 
+= 3; /* restore original pSrcLimit */ 
 902         while(pSrc 
< pSrcLimit
) { 
 906                  * ASCII, or a trail byte in lead position which is treated like 
 907                  * a single-byte sequence for better character boundary 
 908                  * resynchronization after illegal sequences. 
 912             } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 913                 if(pSrc 
< pSrcLimit
) { 
 914                     /* 0x3080 = (0xc0 << 6) + 0x80 */ 
 915                     *pDest
++ = (UChar
)((ch 
<< 6) + *pSrc
++ - 0x3080); 
 918             } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 919                 if((pSrcLimit 
- pSrc
) >= 2) { 
 920                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 921                     /* 0x2080 = (0x80 << 6) + 0x80 */ 
 922                     ch 
= (ch 
<< 12) + (*pSrc
++ << 6); 
 923                     *pDest
++ = (UChar
)(ch 
+ *pSrc
++ - 0x2080); 
 927             } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 928                 if((pSrcLimit 
- pSrc
) >= 3) { 
 929                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 
 930                     ch 
= (ch 
<< 18) + (*pSrc
++ << 12); 
 932                     ch 
+= *pSrc
++ - 0x3c82080; 
 933                     *(pDest
++) = U16_LEAD(ch
); 
 934                     *(pDest
++) = U16_TRAIL(ch
); 
 940             /* truncated character at the end */ 
 946     reqLength
+=(int32_t)(pDest 
- dest
); 
 949         *pDestLength 
= reqLength
; 
 952     /* Terminate the buffer */ 
 953     u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
); 
 958 static inline uint8_t * 
 959 _appendUTF8(uint8_t *pDest
, UChar32 c
) { 
 960     /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */ 
 963     } else if(c
<=0x7ff) { 
 964         *pDest
++=(uint8_t)((c
>>6)|0xc0); 
 965         *pDest
++=(uint8_t)((c
&0x3f)|0x80); 
 966     } else if(c
<=0xffff) { 
 967         *pDest
++=(uint8_t)((c
>>12)|0xe0); 
 968         *pDest
++=(uint8_t)(((c
>>6)&0x3f)|0x80); 
 969         *pDest
++=(uint8_t)(((c
)&0x3f)|0x80); 
 970     } else /* if((uint32_t)(c)<=0x10ffff) */ { 
 971         *pDest
++=(uint8_t)(((c
)>>18)|0xf0); 
 972         *pDest
++=(uint8_t)((((c
)>>12)&0x3f)|0x80); 
 973         *pDest
++=(uint8_t)((((c
)>>6)&0x3f)|0x80); 
 974         *pDest
++=(uint8_t)(((c
)&0x3f)|0x80); 
 980 U_CAPI 
char* U_EXPORT2 
 
 981 u_strToUTF8WithSub(char *dest
, 
 982             int32_t destCapacity
, 
 983             int32_t *pDestLength
, 
 986             UChar32 subchar
, int32_t *pNumSubstitutions
, 
 987             UErrorCode 
*pErrorCode
){ 
 990     uint8_t *pDest 
= (uint8_t *)dest
; 
 991     uint8_t *pDestLimit 
= (pDest
!=NULL
)?(pDest 
+ destCapacity
):NULL
; 
 992     int32_t numSubstitutions
; 
 995     if(pErrorCode
==NULL 
|| U_FAILURE(*pErrorCode
)){ 
 999     if( (pSrc
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
1000         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
1001         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
1003         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
1007     if(pNumSubstitutions
!=NULL
) { 
1008         *pNumSubstitutions
=0; 
1013         while((ch
=*pSrc
)!=0) { 
1016                 if(pDest
<pDestLimit
) { 
1017                     *pDest
++ = (uint8_t)ch
; 
1022             } else if(ch 
<= 0x7ff) { 
1023                 if((pDestLimit 
- pDest
) >= 2) { 
1024                     *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1025                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1030             } else if(ch 
<= 0xd7ff || ch 
>= 0xe000) { 
1031                 if((pDestLimit 
- pDest
) >= 3) { 
1032                     *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1033                     *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1034                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1039             } else /* ch is a surrogate */ { 
1042                 /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/ 
1043                 if(U16_IS_SURROGATE_LEAD(ch
) && U16_IS_TRAIL(ch2
=*pSrc
)) {  
1045                     ch
=U16_GET_SUPPLEMENTARY(ch
, ch2
); 
1046                 } else if(subchar
>=0) { 
1050                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1051                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1055                 length 
= U8_LENGTH(ch
); 
1056                 if((pDestLimit 
- pDest
) >= length
) { 
1057                     /* convert and append*/ 
1058                     pDest
=_appendUTF8(pDest
, ch
); 
1065         while((ch
=*pSrc
++)!=0) { 
1068             } else if(ch
<=0x7ff) { 
1070             } else if(!U16_IS_SURROGATE(ch
)) { 
1072             } else if(U16_IS_SURROGATE_LEAD(ch
) && U16_IS_TRAIL(ch2
=*pSrc
)) { 
1075             } else if(subchar
>=0) { 
1076                 reqLength
+=U8_LENGTH(subchar
); 
1079                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1080                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1085         const UChar 
*pSrcLimit 
= (pSrc
!=NULL
)?(pSrc
+srcLength
):NULL
; 
1088         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
1091              * Each iteration of the inner loop progresses by at most 3 UTF-8 
1092              * bytes and one UChar, for most characters. 
1093              * For supplementary code points (4 & 2), which are rare, 
1094              * there is an additional adjustment. 
1096             count 
= (int32_t)((pDestLimit 
- pDest
) / 3); 
1097             srcLength 
= (int32_t)(pSrcLimit 
- pSrc
); 
1098             if(count 
> srcLength
) { 
1099                 count 
= srcLength
; /* min(remaining dest/3, remaining src) */ 
1103                  * Too much overhead if we get near the end of the string, 
1104                  * continue with the next loop. 
1111                     *pDest
++ = (uint8_t)ch
; 
1112                 } else if(ch 
<= 0x7ff) { 
1113                     *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1114                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1115                 } else if(ch 
<= 0xd7ff || ch 
>= 0xe000) { 
1116                     *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1117                     *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1118                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1119                 } else /* ch is a surrogate */ { 
1121                      * We will read two UChars and probably output four bytes, 
1122                      * which we didn't account for with computing count, 
1123                      * so we adjust it here. 
1126                         --pSrc
; /* undo ch=*pSrc++ for the lead surrogate */ 
1127                         break;  /* recompute count */ 
1130                     if(U16_IS_SURROGATE_LEAD(ch
) && U16_IS_TRAIL(ch2
=*pSrc
)) {  
1132                         ch
=U16_GET_SUPPLEMENTARY(ch
, ch2
); 
1134                         /* writing 4 bytes per 2 UChars is ok */ 
1135                         *pDest
++=(uint8_t)((ch
>>18)|0xf0); 
1136                         *pDest
++=(uint8_t)(((ch
>>12)&0x3f)|0x80); 
1137                         *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1138                         *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1140                         /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1145                             *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1149                         /* convert and append*/ 
1150                         pDest
=_appendUTF8(pDest
, ch
); 
1153             } while(--count 
> 0); 
1156         while(pSrc
<pSrcLimit
) { 
1159                 if(pDest
<pDestLimit
) { 
1160                     *pDest
++ = (uint8_t)ch
; 
1165             } else if(ch 
<= 0x7ff) { 
1166                 if((pDestLimit 
- pDest
) >= 2) { 
1167                     *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1168                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1173             } else if(ch 
<= 0xd7ff || ch 
>= 0xe000) { 
1174                 if((pDestLimit 
- pDest
) >= 3) { 
1175                     *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1176                     *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1177                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1182             } else /* ch is a surrogate */ { 
1185                 if(U16_IS_SURROGATE_LEAD(ch
) && pSrc
<pSrcLimit 
&& U16_IS_TRAIL(ch2
=*pSrc
)) {  
1187                     ch
=U16_GET_SUPPLEMENTARY(ch
, ch2
); 
1188                 } else if(subchar
>=0) { 
1192                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1193                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1197                 length 
= U8_LENGTH(ch
); 
1198                 if((pDestLimit 
- pDest
) >= length
) { 
1199                     /* convert and append*/ 
1200                     pDest
=_appendUTF8(pDest
, ch
); 
1207         while(pSrc
<pSrcLimit
) { 
1211             } else if(ch
<=0x7ff) { 
1213             } else if(!U16_IS_SURROGATE(ch
)) { 
1215             } else if(U16_IS_SURROGATE_LEAD(ch
) && pSrc
<pSrcLimit 
&& U16_IS_TRAIL(ch2
=*pSrc
)) { 
1218             } else if(subchar
>=0) { 
1219                 reqLength
+=U8_LENGTH(subchar
); 
1222                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1223                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1229     reqLength
+=(int32_t)(pDest 
- (uint8_t *)dest
); 
1231     if(pNumSubstitutions
!=NULL
) { 
1232         *pNumSubstitutions
=numSubstitutions
; 
1236         *pDestLength 
= reqLength
; 
1239     /* Terminate the buffer */ 
1240     u_terminateChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1244 U_CAPI 
char* U_EXPORT2 
 
1245 u_strToUTF8(char *dest
, 
1246             int32_t destCapacity
, 
1247             int32_t *pDestLength
, 
1250             UErrorCode 
*pErrorCode
){ 
1251     return u_strToUTF8WithSub( 
1252             dest
, destCapacity
, pDestLength
, 
1258 U_CAPI UChar
* U_EXPORT2
 
1259 u_strFromJavaModifiedUTF8WithSub( 
1261         int32_t destCapacity
, 
1262         int32_t *pDestLength
, 
1265         UChar32 subchar
, int32_t *pNumSubstitutions
, 
1266         UErrorCode 
*pErrorCode
) { 
1267     UChar 
*pDest 
= dest
; 
1268     UChar 
*pDestLimit 
= dest
+destCapacity
; 
1270     int32_t reqLength 
= 0; 
1271     const uint8_t* pSrc 
= (const uint8_t*) src
; 
1272     const uint8_t *pSrcLimit
; 
1274     uint8_t t1
, t2
; /* trail bytes */ 
1275     int32_t numSubstitutions
; 
1278     if(U_FAILURE(*pErrorCode
)){ 
1281     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
1282         (dest
==NULL 
&& destCapacity
!=0) || destCapacity
<0 || 
1283         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
1285         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
1289     if(pNumSubstitutions
!=NULL
) { 
1290         *pNumSubstitutions
=0; 
1296          * Transform a NUL-terminated ASCII string. 
1297          * Handle non-ASCII strings with slower code. 
1299         while(((ch 
= *pSrc
) != 0) && ch 
<= 0x7f && (pDest 
< pDestLimit
)) { 
1304             reqLength
=(int32_t)(pDest 
- dest
); 
1306                 *pDestLength 
= reqLength
; 
1309             /* Terminate the buffer */ 
1310             u_terminateUChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1313         srcLength 
= uprv_strlen((const char *)pSrc
); 
1316     /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
1317     pSrcLimit 
= (pSrc 
== NULL
) ? NULL 
: pSrc 
+ srcLength
; 
1319         count 
= (int32_t)(pDestLimit 
- pDest
); 
1320         srcLength 
= (int32_t)(pSrcLimit 
- pSrc
); 
1321         if(count 
>= srcLength 
&& srcLength 
> 0 && *pSrc 
<= 0x7f) { 
1322             /* fast ASCII loop */ 
1323             const uint8_t *prevSrc 
= pSrc
; 
1325             while(pSrc 
< pSrcLimit 
&& (ch 
= *pSrc
) <= 0x7f) { 
1329             delta 
= (int32_t)(pSrc 
- prevSrc
); 
1334          * Each iteration of the inner loop progresses by at most 3 UTF-8 
1335          * bytes and one UChar. 
1338         if(count 
> srcLength
) { 
1339             count 
= srcLength
; /* min(remaining dest, remaining src/3) */ 
1343              * Too much overhead if we get near the end of the string, 
1344              * continue with the next loop. 
1355                     if( /* handle U+0000..U+FFFF inline */ 
1357                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
1358                         (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
1360                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
1361                         *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
1366                     if( /* handle U+0000..U+07FF inline */ 
1368                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
1370                         *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
1377                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1379                 } else if(subchar 
> 0xffff && --count 
== 0) { 
1381                      * We need to write two UChars, adjusted count for that, 
1382                      * and ran out of space. 
1386                     /* function call for error cases */ 
1387                     ++pSrc
; /* continue after the lead byte */ 
1388                     utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
1390                     if(subchar
<=0xFFFF) { 
1391                         *(pDest
++)=(UChar
)subchar
; 
1393                         *(pDest
++)=U16_LEAD(subchar
); 
1394                         *(pDest
++)=U16_TRAIL(subchar
); 
1398         } while(--count 
> 0); 
1401     while((pSrc
<pSrcLimit
) && (pDest
<pDestLimit
)) { 
1408                 if( /* handle U+0000..U+FFFF inline */ 
1410                     ((pSrcLimit 
- pSrc
) >= 3) && 
1411                     (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
1412                     (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
1414                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
1415                     *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
1420                 if( /* handle U+0000..U+07FF inline */ 
1422                     ((pSrcLimit 
- pSrc
) >= 2) && 
1423                     (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
1425                     *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
1432                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1435                 /* function call for error cases */ 
1436                 ++pSrc
; /* continue after the lead byte */ 
1437                 utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
1439                 if(subchar
<=0xFFFF) { 
1440                     *(pDest
++)=(UChar
)subchar
; 
1442                     *(pDest
++)=U16_LEAD(subchar
); 
1443                     if(pDest
<pDestLimit
) { 
1444                         *(pDest
++)=U16_TRAIL(subchar
); 
1454     /* do not fill the dest buffer just count the UChars needed */ 
1455     while(pSrc 
< pSrcLimit
){ 
1462                 if( /* handle U+0000..U+FFFF inline */ 
1464                     ((pSrcLimit 
- pSrc
) >= 3) && 
1465                     (uint8_t)(pSrc
[1] - 0x80) <= 0x3f && 
1466                     (uint8_t)(pSrc
[2] - 0x80) <= 0x3f 
1473                 if( /* handle U+0000..U+07FF inline */ 
1475                     ((pSrcLimit 
- pSrc
) >= 2) && 
1476                     (uint8_t)(pSrc
[1] - 0x80) <= 0x3f 
1485                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1488                 /* function call for error cases */ 
1489                 ++pSrc
; /* continue after the lead byte */ 
1490                 utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
1492                 reqLength
+=U16_LENGTH(ch
); 
1497     if(pNumSubstitutions
!=NULL
) { 
1498         *pNumSubstitutions
=numSubstitutions
; 
1501     reqLength
+=(int32_t)(pDest 
- dest
); 
1503         *pDestLength 
= reqLength
; 
1506     /* Terminate the buffer */ 
1507     u_terminateUChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1511 U_CAPI 
char* U_EXPORT2 
 
1512 u_strToJavaModifiedUTF8( 
1514         int32_t destCapacity
, 
1515         int32_t *pDestLength
, 
1518         UErrorCode 
*pErrorCode
) { 
1519     int32_t reqLength
=0; 
1521     uint8_t *pDest 
= (uint8_t *)dest
; 
1522     uint8_t *pDestLimit 
= pDest 
+ destCapacity
; 
1523     const UChar 
*pSrcLimit
; 
1527     if(U_FAILURE(*pErrorCode
)){ 
1530     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
1531         (dest
==NULL 
&& destCapacity
!=0) || destCapacity
<0 
1533         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
1538         /* Convert NUL-terminated ASCII, then find the string length. */ 
1539         while((ch
=*src
)<=0x7f && ch 
!= 0 && pDest
<pDestLimit
) { 
1540             *pDest
++ = (uint8_t)ch
; 
1544             reqLength
=(int32_t)(pDest 
- (uint8_t *)dest
); 
1546                 *pDestLength 
= reqLength
; 
1549             /* Terminate the buffer */ 
1550             u_terminateChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1553         srcLength 
= u_strlen(src
); 
1556     /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
1557     pSrcLimit 
= (src
!=NULL
)?(src
+srcLength
):NULL
; 
1559         count 
= (int32_t)(pDestLimit 
- pDest
); 
1560         srcLength 
= (int32_t)(pSrcLimit 
- src
); 
1561         if(count 
>= srcLength 
&& srcLength 
> 0 && *src 
<= 0x7f) { 
1562             /* fast ASCII loop */ 
1563             const UChar 
*prevSrc 
= src
; 
1565             while(src 
< pSrcLimit 
&& (ch 
= *src
) <= 0x7f && ch 
!= 0) { 
1566                 *pDest
++=(uint8_t)ch
; 
1569             delta 
= (int32_t)(src 
- prevSrc
); 
1574          * Each iteration of the inner loop progresses by at most 3 UTF-8 
1575          * bytes and one UChar. 
1578         if(count 
> srcLength
) { 
1579             count 
= srcLength
; /* min(remaining dest/3, remaining src) */ 
1583              * Too much overhead if we get near the end of the string, 
1584              * continue with the next loop. 
1590             if(ch 
<= 0x7f && ch 
!= 0) { 
1591                 *pDest
++ = (uint8_t)ch
; 
1592             } else if(ch 
<= 0x7ff) { 
1593                 *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1594                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1596                 *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1597                 *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1598                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1600         } while(--count 
> 0); 
1603     while(src
<pSrcLimit
) { 
1605         if(ch 
<= 0x7f && ch 
!= 0) { 
1606             if(pDest
<pDestLimit
) { 
1607                 *pDest
++ = (uint8_t)ch
; 
1612         } else if(ch 
<= 0x7ff) { 
1613             if((pDestLimit 
- pDest
) >= 2) { 
1614                 *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1615                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1621             if((pDestLimit 
- pDest
) >= 3) { 
1622                 *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1623                 *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1624                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1631     while(src
<pSrcLimit
) { 
1633         if(ch 
<= 0x7f && ch 
!= 0) { 
1635         } else if(ch
<=0x7ff) { 
1642     reqLength
+=(int32_t)(pDest 
- (uint8_t *)dest
); 
1644         *pDestLength 
= reqLength
; 
1647     /* Terminate the buffer */ 
1648     u_terminateChars(dest
, destCapacity
, reqLength
, pErrorCode
);