]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/ustrtrns.c
   2 ****************************************************************************** 
   4 *   Copyright (C) 2001-2010, International Business Machines 
   5 *   Corporation and others.  All Rights Reserved. 
   7 ****************************************************************************** 
  11 * Modification History: 
  13 *   Date        Name        Description 
  14 *   9/10/2001    Ram    Creation. 
  15 ****************************************************************************** 
  18 /******************************************************************************* 
  20  * u_strTo* and u_strFrom* APIs 
  21  * WCS functions moved to ustr_wcs.c for better modularization 
  23  ******************************************************************************* 
  27 #include "unicode/putil.h" 
  28 #include "unicode/ustring.h" 
  33 U_CAPI UChar
* U_EXPORT2 
 
  34 u_strFromUTF32WithSub(UChar 
*dest
, 
  39                UChar32 subchar
, int32_t *pNumSubstitutions
, 
  40                UErrorCode 
*pErrorCode
) { 
  41     const UChar32 
*srcLimit
; 
  46     int32_t numSubstitutions
; 
  49     if(U_FAILURE(*pErrorCode
)){ 
  52     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
  53         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
  54         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
  56         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
  60     if(pNumSubstitutions 
!= NULL
) { 
  61         *pNumSubstitutions 
= 0; 
  65     destLimit 
= dest 
+ destCapacity
; 
  70         /* simple loop for conversion of a NUL-terminated BMP string */ 
  71         while((ch
=*src
) != 0 && 
  72               ((uint32_t)ch 
< 0xd800 || (0xe000 <= ch 
&& ch 
<= 0xffff))) { 
  74             if(pDest 
< destLimit
) { 
  82             /* "complicated" case, find the end of the remaining string */ 
  83             while(*++srcLimit 
!= 0) {} 
  86         srcLimit 
= src 
+ srcLength
; 
  89     /* convert with length */ 
  90     while(src 
< srcLimit
) { 
  93             /* usually "loops" once; twice only for writing subchar */ 
  94             if((uint32_t)ch 
< 0xd800 || (0xe000 <= ch 
&& ch 
<= 0xffff)) { 
  95                 if(pDest 
< destLimit
) { 
 101             } else if(0x10000 <= ch 
&& ch 
<= 0x10ffff) { 
 102                 if((pDest 
+ 2) <= destLimit
) { 
 103                     *pDest
++ = U16_LEAD(ch
); 
 104                     *pDest
++ = U16_TRAIL(ch
); 
 109             } else if((ch 
= subchar
) < 0) { 
 110                 /* surrogate code point, or not a Unicode code point at all */ 
 111                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 119     reqLength 
+= (int32_t)(pDest 
- dest
); 
 121         *pDestLength 
= reqLength
; 
 123     if(pNumSubstitutions 
!= NULL
) { 
 124         *pNumSubstitutions 
= numSubstitutions
; 
 127     /* Terminate the buffer */ 
 128     u_terminateUChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
 133 U_CAPI UChar
* U_EXPORT2 
 
 134 u_strFromUTF32(UChar 
*dest
, 
 135                int32_t destCapacity
,  
 136                int32_t *pDestLength
, 
 139                UErrorCode 
*pErrorCode
) { 
 140     return u_strFromUTF32WithSub( 
 141             dest
, destCapacity
, pDestLength
, 
 147 U_CAPI UChar32
* U_EXPORT2 
 
 148 u_strToUTF32WithSub(UChar32 
*dest
, 
 149              int32_t destCapacity
, 
 150              int32_t *pDestLength
, 
 153              UChar32 subchar
, int32_t *pNumSubstitutions
, 
 154              UErrorCode 
*pErrorCode
) { 
 155     const UChar 
*srcLimit
; 
 161     int32_t numSubstitutions
; 
 164     if(U_FAILURE(*pErrorCode
)){ 
 167     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
 168         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
 169         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
 171         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 175     if(pNumSubstitutions 
!= NULL
) { 
 176         *pNumSubstitutions 
= 0; 
 180     destLimit 
= dest 
+ destCapacity
; 
 182     numSubstitutions 
= 0; 
 185         /* simple loop for conversion of a NUL-terminated BMP string */ 
 186         while((ch
=*src
) != 0 && !U16_IS_SURROGATE(ch
)) { 
 188             if(pDest 
< destLimit
) { 
 196             /* "complicated" case, find the end of the remaining string */ 
 197             while(*++srcLimit 
!= 0) {} 
 200         srcLimit 
= src 
+ srcLength
; 
 203     /* convert with length */ 
 204     while(src 
< srcLimit
) { 
 206         if(!U16_IS_SURROGATE(ch
)) { 
 207             /* write or count ch below */ 
 208         } else if(U16_IS_SURROGATE_LEAD(ch
) && src 
< srcLimit 
&& U16_IS_TRAIL(ch2 
= *src
)) { 
 210             ch 
= U16_GET_SUPPLEMENTARY(ch
, ch2
); 
 211         } else if((ch 
= subchar
) < 0) { 
 212             /* unpaired surrogate */ 
 213             *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 218         if(pDest 
< destLimit
) { 
 225     reqLength 
+= (int32_t)(pDest 
- dest
); 
 227         *pDestLength 
= reqLength
; 
 229     if(pNumSubstitutions 
!= NULL
) { 
 230         *pNumSubstitutions 
= numSubstitutions
; 
 233     /* Terminate the buffer */ 
 234     u_terminateUChar32s(dest
, destCapacity
, reqLength
, pErrorCode
); 
 239 U_CAPI UChar32
* U_EXPORT2 
 
 240 u_strToUTF32(UChar32 
*dest
,  
 241              int32_t destCapacity
, 
 242              int32_t *pDestLength
, 
 245              UErrorCode 
*pErrorCode
) { 
 246     return u_strToUTF32WithSub( 
 247             dest
, destCapacity
, pDestLength
, 
 253 /* for utf8_nextCharSafeBodyTerminated() */ 
 255 utf8_minLegal
[4]={ 0, 0x80, 0x800, 0x10000 }; 
 258  * Version of utf8_nextCharSafeBody() with the following differences: 
 259  * - checks for NUL termination instead of length 
 260  * - works with pointers instead of indexes 
 261  * - always strict (strict==-1) 
 263  * *ps points to after the lead byte and will be moved to after the last trail byte. 
 264  * c is the lead byte. 
 265  * @return the code point, or U_SENTINEL 
 268 utf8_nextCharSafeBodyTerminated(const uint8_t **ps
, UChar32 c
) { 
 269     const uint8_t *s
=*ps
; 
 270     uint8_t trail
, illegal
=0; 
 271     uint8_t count
=UTF8_COUNT_TRAIL_BYTES(c
); 
 272     UTF8_MASK_LEAD_BYTE((c
), count
); 
 273     /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 
 275     /* each branch falls through to the next one */ 
 278         /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 
 282         trail
=(uint8_t)(*s
++ - 0x80); 
 284         if(trail
>0x3f || c
>=0x110) { 
 285             /* not a trail byte, or code point>0x10ffff (outside Unicode) */ 
 290         trail
=(uint8_t)(*s
++ - 0x80); 
 292             /* not a trail byte */ 
 298         trail
=(uint8_t)(*s
++ - 0x80); 
 300             /* not a trail byte */ 
 307     /* no default branch to optimize switch()  - all values are covered */ 
 310     /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 
 311     /* illegal is also set if count>=4 */ 
 312     if(illegal 
|| c
<utf8_minLegal
[count
] || UTF_IS_SURROGATE(c
)) { 
 314         /* don't go beyond this sequence */ 
 316         while(count
>0 && UTF8_IS_TRAIL(*s
)) { 
 327  * Version of utf8_nextCharSafeBody() with the following differences: 
 328  * - works with pointers instead of indexes 
 329  * - always strict (strict==-1) 
 331  * *ps points to after the lead byte and will be moved to after the last trail byte. 
 332  * c is the lead byte. 
 333  * @return the code point, or U_SENTINEL 
 336 utf8_nextCharSafeBodyPointer(const uint8_t **ps
, const uint8_t *limit
, UChar32 c
) { 
 337     const uint8_t *s
=*ps
; 
 338     uint8_t trail
, illegal
=0; 
 339     uint8_t count
=UTF8_COUNT_TRAIL_BYTES(c
); 
 340     if((limit
-s
)>=count
) { 
 341         UTF8_MASK_LEAD_BYTE((c
), count
); 
 342         /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 
 344         /* each branch falls through to the next one */ 
 347             /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 
 352             c
=(c
<<6)|(trail
&0x3f); 
 354                 illegal
|=(trail
&0xc0)^0x80; 
 356                 /* code point>0x10ffff, outside Unicode */ 
 362             c
=(c
<<6)|(trail
&0x3f); 
 363             illegal
|=(trail
&0xc0)^0x80; 
 366             c
=(c
<<6)|(trail
&0x3f); 
 367             illegal
|=(trail
&0xc0)^0x80; 
 371         /* no default branch to optimize switch()  - all values are covered */ 
 374         illegal
=1; /* too few bytes left */ 
 377     /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 
 378     /* illegal is also set if count>=4 */ 
 379     if(illegal 
|| c
<utf8_minLegal
[count
] || UTF_IS_SURROGATE(c
)) { 
 381         /* don't go beyond this sequence */ 
 383         while(count
>0 && s
<limit 
&& UTF8_IS_TRAIL(*s
)) { 
 393 U_CAPI UChar
* U_EXPORT2
 
 394 u_strFromUTF8WithSub(UChar 
*dest
, 
 395               int32_t destCapacity
, 
 396               int32_t *pDestLength
, 
 399               UChar32 subchar
, int32_t *pNumSubstitutions
, 
 400               UErrorCode 
*pErrorCode
){ 
 402     UChar 
*pDestLimit 
= dest
+destCapacity
; 
 404     int32_t reqLength 
= 0; 
 405     const uint8_t* pSrc 
= (const uint8_t*) src
; 
 406     uint8_t t1
, t2
; /* trail bytes */ 
 407     int32_t numSubstitutions
; 
 410     if(pErrorCode
==NULL 
|| U_FAILURE(*pErrorCode
)){ 
 414     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
 415         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
 416         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
 418         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 422     if(pNumSubstitutions
!=NULL
) { 
 423         *pNumSubstitutions
=0; 
 428      * Inline processing of UTF-8 byte sequences: 
 430      * Byte sequences for the most common characters are handled inline in 
 431      * the conversion loops. In order to reduce the path lengths for those 
 432      * characters, the tests are arranged in a kind of binary search. 
 433      * ASCII (<=0x7f) is checked first, followed by the dividing point 
 434      * between 2- and 3-byte sequences (0xe0). 
 435      * The 3-byte branch is tested first to speed up CJK text. 
 436      * The compiler should combine the subtractions for the two tests for 0xe0. 
 437      * Each branch then tests for the other end of its range. 
 442          * Transform a NUL-terminated string. 
 443          * The code explicitly checks for NULs only in the lead byte position. 
 444          * A NUL byte in the trail byte position fails the trail byte range check anyway. 
 446         while(((ch 
= *pSrc
) != 0) && (pDest 
< pDestLimit
)) { 
 452                     if( /* handle U+1000..U+CFFF inline */ 
 454                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
 455                         (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
 457                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 458                         *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
 462                 } else if(ch 
< 0xe0) { 
 463                     if( /* handle U+0080..U+07FF inline */ 
 465                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
 467                         *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
 473                 /* function call for "complicated" and error cases */ 
 474                 ++pSrc
; /* continue after the lead byte */ 
 475                 ch
=utf8_nextCharSafeBodyTerminated(&pSrc
, ch
); 
 476                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0) { 
 477                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 479                 } else if(ch
<=0xFFFF) { 
 480                     *(pDest
++)=(UChar
)ch
; 
 482                     *(pDest
++)=UTF16_LEAD(ch
); 
 483                     if(pDest
<pDestLimit
) { 
 484                         *(pDest
++)=UTF16_TRAIL(ch
); 
 493         /* Pre-flight the rest of the string. */ 
 494         while((ch 
= *pSrc
) != 0) { 
 500                     if( /* handle U+1000..U+CFFF inline */ 
 502                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f && 
 503                         (uint8_t)(pSrc
[2] - 0x80) <= 0x3f 
 509                 } else if(ch 
< 0xe0) { 
 510                     if( /* handle U+0080..U+07FF inline */ 
 512                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f 
 520                 /* function call for "complicated" and error cases */ 
 521                 ++pSrc
; /* continue after the lead byte */ 
 522                 ch
=utf8_nextCharSafeBodyTerminated(&pSrc
, ch
); 
 523                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0) { 
 524                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 527                 reqLength 
+= U16_LENGTH(ch
); 
 530     } else /* srcLength >= 0 */ { 
 531         const uint8_t *pSrcLimit 
= pSrc 
+ srcLength
; 
 534         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
 537              * Each iteration of the inner loop progresses by at most 3 UTF-8 
 538              * bytes and one UChar, for most characters. 
 539              * For supplementary code points (4 & 2), which are rare, 
 540              * there is an additional adjustment. 
 542             count 
= (int32_t)(pDestLimit 
- pDest
); 
 543             srcLength 
= (int32_t)((pSrcLimit 
- pSrc
) / 3); 
 544             if(count 
> srcLength
) { 
 545                 count 
= srcLength
; /* min(remaining dest, remaining src/3) */ 
 549                  * Too much overhead if we get near the end of the string, 
 550                  * continue with the next loop. 
 562                         if( /* handle U+1000..U+CFFF inline */ 
 564                             (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
 565                             (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
 567                             /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 568                             *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
 572                     } else if(ch 
< 0xe0) { 
 573                         if( /* handle U+0080..U+07FF inline */ 
 575                             (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
 577                             *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
 583                     if(ch 
>= 0xf0 || subchar 
> 0xffff) { 
 585                          * We may read up to six bytes and write up to two UChars, 
 586                          * which we didn't account for with computing count, 
 587                          * so we adjust it here. 
 594                     /* function call for "complicated" and error cases */ 
 595                     ++pSrc
; /* continue after the lead byte */ 
 596                     ch
=utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
 597                     if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0){ 
 598                         *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 600                     }else if(ch
<=0xFFFF){ 
 601                         *(pDest
++)=(UChar
)ch
; 
 603                         *(pDest
++)=UTF16_LEAD(ch
); 
 604                         *(pDest
++)=UTF16_TRAIL(ch
); 
 607             } while(--count 
> 0); 
 610         while((pSrc
<pSrcLimit
) && (pDest
<pDestLimit
)) { 
 617                     if( /* handle U+1000..U+CFFF inline */ 
 619                         ((pSrcLimit 
- pSrc
) >= 3) && 
 620                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
 621                         (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
 623                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 624                         *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
 628                 } else if(ch 
< 0xe0) { 
 629                     if( /* handle U+0080..U+07FF inline */ 
 631                         ((pSrcLimit 
- pSrc
) >= 2) && 
 632                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
 634                         *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
 640                 /* function call for "complicated" and error cases */ 
 641                 ++pSrc
; /* continue after the lead byte */ 
 642                 ch
=utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
 643                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0){ 
 644                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 646                 }else if(ch
<=0xFFFF){ 
 647                     *(pDest
++)=(UChar
)ch
; 
 649                     *(pDest
++)=UTF16_LEAD(ch
); 
 650                     if(pDest
<pDestLimit
){ 
 651                         *(pDest
++)=UTF16_TRAIL(ch
); 
 659         /* do not fill the dest buffer just count the UChars needed */ 
 660         while(pSrc 
< pSrcLimit
){ 
 667                     if( /* handle U+1000..U+CFFF inline */ 
 669                         ((pSrcLimit 
- pSrc
) >= 3) && 
 670                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f && 
 671                         (uint8_t)(pSrc
[2] - 0x80) <= 0x3f 
 677                 } else if(ch 
< 0xe0) { 
 678                     if( /* handle U+0080..U+07FF inline */ 
 680                         ((pSrcLimit 
- pSrc
) >= 2) && 
 681                         (uint8_t)(pSrc
[1] - 0x80) <= 0x3f 
 689                 /* function call for "complicated" and error cases */ 
 690                 ++pSrc
; /* continue after the lead byte */ 
 691                 ch
=utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
 692                 if(ch
<0 && (++numSubstitutions
, ch 
= subchar
) < 0){ 
 693                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
 696                 reqLength
+=UTF_CHAR_LENGTH(ch
); 
 701     reqLength
+=(int32_t)(pDest 
- dest
); 
 703     if(pNumSubstitutions
!=NULL
) { 
 704         *pNumSubstitutions
=numSubstitutions
; 
 708         *pDestLength 
= reqLength
; 
 711     /* Terminate the buffer */ 
 712     u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
); 
 717 U_CAPI UChar
* U_EXPORT2
 
 718 u_strFromUTF8(UChar 
*dest
, 
 719               int32_t destCapacity
, 
 720               int32_t *pDestLength
, 
 723               UErrorCode 
*pErrorCode
){ 
 724     return u_strFromUTF8WithSub( 
 725             dest
, destCapacity
, pDestLength
, 
 731 U_CAPI UChar 
* U_EXPORT2
 
 732 u_strFromUTF8Lenient(UChar 
*dest
, 
 733                      int32_t destCapacity
, 
 734                      int32_t *pDestLength
, 
 737                      UErrorCode 
*pErrorCode
) { 
 740     int32_t reqLength 
= 0; 
 741     uint8_t* pSrc 
= (uint8_t*) src
; 
 744     if(pErrorCode
==NULL 
|| U_FAILURE(*pErrorCode
)){ 
 748     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
 749         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) 
 751         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 756         /* Transform a NUL-terminated string. */ 
 757         UChar 
*pDestLimit 
= dest
+destCapacity
; 
 758         uint8_t t1
, t2
, t3
; /* trail bytes */ 
 760         while(((ch 
= *pSrc
) != 0) && (pDest 
< pDestLimit
)) { 
 763                  * ASCII, or a trail byte in lead position which is treated like 
 764                  * a single-byte sequence for better character boundary 
 765                  * resynchronization after illegal sequences. 
 770             } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 771                 if((t1 
= pSrc
[1]) != 0) { 
 772                     /* 0x3080 = (0xc0 << 6) + 0x80 */ 
 773                     *pDest
++ = (UChar
)((ch 
<< 6) + t1 
- 0x3080); 
 777             } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 778                 if((t1 
= pSrc
[1]) != 0 && (t2 
= pSrc
[2]) != 0) { 
 779                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 780                     /* 0x2080 = (0x80 << 6) + 0x80 */ 
 781                     *pDest
++ = (UChar
)((ch 
<< 12) + (t1 
<< 6) + t2 
- 0x2080); 
 785             } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 786                 if((t1 
= pSrc
[1]) != 0 && (t2 
= pSrc
[2]) != 0 && (t3 
= pSrc
[3]) != 0) { 
 788                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 
 789                     ch 
= (ch 
<< 18) + (t1 
<< 12) + (t2 
<< 6) + t3 
- 0x3c82080; 
 790                     *(pDest
++) = U16_LEAD(ch
); 
 791                     if(pDest 
< pDestLimit
) { 
 792                         *(pDest
++) = U16_TRAIL(ch
); 
 801             /* truncated character at the end */ 
 803             while(*++pSrc 
!= 0) {} 
 807         /* Pre-flight the rest of the string. */ 
 808         while((ch 
= *pSrc
) != 0) { 
 811                  * ASCII, or a trail byte in lead position which is treated like 
 812                  * a single-byte sequence for better character boundary 
 813                  * resynchronization after illegal sequences. 
 818             } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 824             } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 825                 if(pSrc
[1] != 0 && pSrc
[2] != 0) { 
 830             } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 831                 if(pSrc
[1] != 0 && pSrc
[2] != 0 && pSrc
[3] != 0) { 
 838             /* truncated character at the end */ 
 842     } else /* srcLength >= 0 */ { 
 843         const uint8_t *pSrcLimit 
= pSrc 
+ srcLength
; 
 846          * This function requires that if srcLength is given, then it must be 
 847          * destCapatity >= srcLength so that we need not check for 
 848          * destination buffer overflow in the loop. 
 850         if(destCapacity 
< srcLength
) { 
 851             if(pDestLength 
!= NULL
) { 
 852                 *pDestLength 
= srcLength
; /* this likely overestimates the true destLength! */ 
 854             *pErrorCode 
= U_BUFFER_OVERFLOW_ERROR
; 
 858         if((pSrcLimit 
- pSrc
) >= 4) { 
 859             pSrcLimit 
-= 3; /* temporarily reduce pSrcLimit */ 
 861             /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ 
 866                      * ASCII, or a trail byte in lead position which is treated like 
 867                      * a single-byte sequence for better character boundary 
 868                      * resynchronization after illegal sequences. 
 871                 } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 872                     /* 0x3080 = (0xc0 << 6) + 0x80 */ 
 873                     *pDest
++ = (UChar
)((ch 
<< 6) + *pSrc
++ - 0x3080); 
 874                 } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 875                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 876                     /* 0x2080 = (0x80 << 6) + 0x80 */ 
 877                     ch 
= (ch 
<< 12) + (*pSrc
++ << 6); 
 878                     *pDest
++ = (UChar
)(ch 
+ *pSrc
++ - 0x2080); 
 879                 } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 880                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 
 881                     ch 
= (ch 
<< 18) + (*pSrc
++ << 12); 
 883                     ch 
+= *pSrc
++ - 0x3c82080; 
 884                     *(pDest
++) = U16_LEAD(ch
); 
 885                     *(pDest
++) = U16_TRAIL(ch
); 
 887             } while(pSrc 
< pSrcLimit
); 
 889             pSrcLimit 
+= 3; /* restore original pSrcLimit */ 
 892         while(pSrc 
< pSrcLimit
) { 
 896                  * ASCII, or a trail byte in lead position which is treated like 
 897                  * a single-byte sequence for better character boundary 
 898                  * resynchronization after illegal sequences. 
 902             } else if(ch 
< 0xe0) { /* U+0080..U+07FF */ 
 903                 if(pSrc 
< pSrcLimit
) { 
 904                     /* 0x3080 = (0xc0 << 6) + 0x80 */ 
 905                     *pDest
++ = (UChar
)((ch 
<< 6) + *pSrc
++ - 0x3080); 
 908             } else if(ch 
< 0xf0) { /* U+0800..U+FFFF */ 
 909                 if((pSrcLimit 
- pSrc
) >= 2) { 
 910                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
 911                     /* 0x2080 = (0x80 << 6) + 0x80 */ 
 912                     ch 
= (ch 
<< 12) + (*pSrc
++ << 6); 
 913                     *pDest
++ = (UChar
)(ch 
+ *pSrc
++ - 0x2080); 
 917             } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 
 918                 if((pSrcLimit 
- pSrc
) >= 3) { 
 919                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 
 920                     ch 
= (ch 
<< 18) + (*pSrc
++ << 12); 
 922                     ch 
+= *pSrc
++ - 0x3c82080; 
 923                     *(pDest
++) = U16_LEAD(ch
); 
 924                     *(pDest
++) = U16_TRAIL(ch
); 
 930             /* truncated character at the end */ 
 936     reqLength
+=(int32_t)(pDest 
- dest
); 
 939         *pDestLength 
= reqLength
; 
 942     /* Terminate the buffer */ 
 943     u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
); 
 948 static U_INLINE 
uint8_t * 
 949 _appendUTF8(uint8_t *pDest
, UChar32 c
) { 
 950     /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */ 
 953     } else if(c
<=0x7ff) { 
 954         *pDest
++=(uint8_t)((c
>>6)|0xc0); 
 955         *pDest
++=(uint8_t)((c
&0x3f)|0x80); 
 956     } else if(c
<=0xffff) { 
 957         *pDest
++=(uint8_t)((c
>>12)|0xe0); 
 958         *pDest
++=(uint8_t)(((c
>>6)&0x3f)|0x80); 
 959         *pDest
++=(uint8_t)(((c
)&0x3f)|0x80); 
 960     } else /* if((uint32_t)(c)<=0x10ffff) */ { 
 961         *pDest
++=(uint8_t)(((c
)>>18)|0xf0); 
 962         *pDest
++=(uint8_t)((((c
)>>12)&0x3f)|0x80); 
 963         *pDest
++=(uint8_t)((((c
)>>6)&0x3f)|0x80); 
 964         *pDest
++=(uint8_t)(((c
)&0x3f)|0x80); 
 970 U_CAPI 
char* U_EXPORT2 
 
 971 u_strToUTF8WithSub(char *dest
, 
 972             int32_t destCapacity
, 
 973             int32_t *pDestLength
, 
 976             UChar32 subchar
, int32_t *pNumSubstitutions
, 
 977             UErrorCode 
*pErrorCode
){ 
 980     uint8_t *pDest 
= (uint8_t *)dest
; 
 981     uint8_t *pDestLimit 
= pDest 
+ destCapacity
; 
 982     int32_t numSubstitutions
; 
 985     if(pErrorCode
==NULL 
|| U_FAILURE(*pErrorCode
)){ 
 989     if( (pSrc
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
 990         (destCapacity
<0) || (dest 
== NULL 
&& destCapacity 
> 0) || 
 991         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
 993         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 997     if(pNumSubstitutions
!=NULL
) { 
 998         *pNumSubstitutions
=0; 
1003         while((ch
=*pSrc
)!=0) { 
1006                 if(pDest
<pDestLimit
) { 
1007                     *pDest
++ = (uint8_t)ch
; 
1012             } else if(ch 
<= 0x7ff) { 
1013                 if((pDestLimit 
- pDest
) >= 2) { 
1014                     *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1015                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1020             } else if(ch 
<= 0xd7ff || ch 
>= 0xe000) { 
1021                 if((pDestLimit 
- pDest
) >= 3) { 
1022                     *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1023                     *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1024                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1029             } else /* ch is a surrogate */ { 
1032                 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/ 
1033                 if(UTF_IS_SURROGATE_FIRST(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {  
1035                     ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
); 
1036                 } else if(subchar
>=0) { 
1040                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1041                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1045                 length 
= U8_LENGTH(ch
); 
1046                 if((pDestLimit 
- pDest
) >= length
) { 
1047                     /* convert and append*/ 
1048                     pDest
=_appendUTF8(pDest
, ch
); 
1055         while((ch
=*pSrc
++)!=0) { 
1058             } else if(ch
<=0x7ff) { 
1060             } else if(!UTF_IS_SURROGATE(ch
)) { 
1062             } else if(UTF_IS_SURROGATE_FIRST(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) { 
1065             } else if(subchar
>=0) { 
1066                 reqLength
+=U8_LENGTH(subchar
); 
1069                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1070                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1075         const UChar 
*pSrcLimit 
= pSrc
+srcLength
; 
1078         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
1081              * Each iteration of the inner loop progresses by at most 3 UTF-8 
1082              * bytes and one UChar, for most characters. 
1083              * For supplementary code points (4 & 2), which are rare, 
1084              * there is an additional adjustment. 
1086             count 
= (int32_t)((pDestLimit 
- pDest
) / 3); 
1087             srcLength 
= (int32_t)(pSrcLimit 
- pSrc
); 
1088             if(count 
> srcLength
) { 
1089                 count 
= srcLength
; /* min(remaining dest/3, remaining src) */ 
1093                  * Too much overhead if we get near the end of the string, 
1094                  * continue with the next loop. 
1101                     *pDest
++ = (uint8_t)ch
; 
1102                 } else if(ch 
<= 0x7ff) { 
1103                     *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1104                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1105                 } else if(ch 
<= 0xd7ff || ch 
>= 0xe000) { 
1106                     *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1107                     *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1108                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1109                 } else /* ch is a surrogate */ { 
1111                      * We will read two UChars and probably output four bytes, 
1112                      * which we didn't account for with computing count, 
1113                      * so we adjust it here. 
1116                         --pSrc
; /* undo ch=*pSrc++ for the lead surrogate */ 
1117                         break;  /* recompute count */ 
1120                     if(UTF_IS_SURROGATE_FIRST(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {  
1122                         ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
); 
1124                         /* writing 4 bytes per 2 UChars is ok */ 
1125                         *pDest
++=(uint8_t)((ch
>>18)|0xf0); 
1126                         *pDest
++=(uint8_t)(((ch
>>12)&0x3f)|0x80); 
1127                         *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1128                         *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1130                         /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1135                             *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1139                         /* convert and append*/ 
1140                         pDest
=_appendUTF8(pDest
, ch
); 
1143             } while(--count 
> 0); 
1146         while(pSrc
<pSrcLimit
) { 
1149                 if(pDest
<pDestLimit
) { 
1150                     *pDest
++ = (uint8_t)ch
; 
1155             } else if(ch 
<= 0x7ff) { 
1156                 if((pDestLimit 
- pDest
) >= 2) { 
1157                     *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1158                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1163             } else if(ch 
<= 0xd7ff || ch 
>= 0xe000) { 
1164                 if((pDestLimit 
- pDest
) >= 3) { 
1165                     *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1166                     *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1167                     *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1172             } else /* ch is a surrogate */ { 
1175                 if(UTF_IS_SURROGATE_FIRST(ch
) && pSrc
<pSrcLimit 
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {  
1177                     ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
); 
1178                 } else if(subchar
>=0) { 
1182                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1183                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1187                 length 
= U8_LENGTH(ch
); 
1188                 if((pDestLimit 
- pDest
) >= length
) { 
1189                     /* convert and append*/ 
1190                     pDest
=_appendUTF8(pDest
, ch
); 
1197         while(pSrc
<pSrcLimit
) { 
1201             } else if(ch
<=0x7ff) { 
1203             } else if(!UTF_IS_SURROGATE(ch
)) { 
1205             } else if(UTF_IS_SURROGATE_FIRST(ch
) && pSrc
<pSrcLimit 
&& UTF_IS_TRAIL(ch2
=*pSrc
)) { 
1208             } else if(subchar
>=0) { 
1209                 reqLength
+=U8_LENGTH(subchar
); 
1212                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 
1213                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1219     reqLength
+=(int32_t)(pDest 
- (uint8_t *)dest
); 
1221     if(pNumSubstitutions
!=NULL
) { 
1222         *pNumSubstitutions
=numSubstitutions
; 
1226         *pDestLength 
= reqLength
; 
1229     /* Terminate the buffer */ 
1230     u_terminateChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1234 U_CAPI 
char* U_EXPORT2 
 
1235 u_strToUTF8(char *dest
, 
1236             int32_t destCapacity
, 
1237             int32_t *pDestLength
, 
1240             UErrorCode 
*pErrorCode
){ 
1241     return u_strToUTF8WithSub( 
1242             dest
, destCapacity
, pDestLength
, 
1248 U_CAPI UChar
* U_EXPORT2
 
1249 u_strFromJavaModifiedUTF8WithSub( 
1251         int32_t destCapacity
, 
1252         int32_t *pDestLength
, 
1255         UChar32 subchar
, int32_t *pNumSubstitutions
, 
1256         UErrorCode 
*pErrorCode
) { 
1257     UChar 
*pDest 
= dest
; 
1258     UChar 
*pDestLimit 
= dest
+destCapacity
; 
1260     int32_t reqLength 
= 0; 
1261     const uint8_t* pSrc 
= (const uint8_t*) src
; 
1262     const uint8_t *pSrcLimit
; 
1264     uint8_t t1
, t2
; /* trail bytes */ 
1265     int32_t numSubstitutions
; 
1268     if(U_FAILURE(*pErrorCode
)){ 
1271     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
1272         (dest
==NULL 
&& destCapacity
!=0) || destCapacity
<0 || 
1273         subchar 
> 0x10ffff || U_IS_SURROGATE(subchar
) 
1275         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
1279     if(pNumSubstitutions
!=NULL
) { 
1280         *pNumSubstitutions
=0; 
1286          * Transform a NUL-terminated ASCII string. 
1287          * Handle non-ASCII strings with slower code. 
1289         while(((ch 
= *pSrc
) != 0) && ch 
<= 0x7f && (pDest 
< pDestLimit
)) { 
1294             reqLength
=(int32_t)(pDest 
- dest
); 
1296                 *pDestLength 
= reqLength
; 
1299             /* Terminate the buffer */ 
1300             u_terminateUChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1303         srcLength 
= uprv_strlen((const char *)pSrc
); 
1306     /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
1307     pSrcLimit 
= pSrc 
+ srcLength
; 
1309         count 
= (int32_t)(pDestLimit 
- pDest
); 
1310         srcLength 
= (int32_t)(pSrcLimit 
- pSrc
); 
1311         if(count 
>= srcLength 
&& srcLength 
> 0 && *pSrc 
<= 0x7f) { 
1312             /* fast ASCII loop */ 
1313             const uint8_t *prevSrc 
= pSrc
; 
1315             while(pSrc 
< pSrcLimit 
&& (ch 
= *pSrc
) <= 0x7f) { 
1319             delta 
= (int32_t)(pSrc 
- prevSrc
); 
1324          * Each iteration of the inner loop progresses by at most 3 UTF-8 
1325          * bytes and one UChar. 
1328         if(count 
> srcLength
) { 
1329             count 
= srcLength
; /* min(remaining dest, remaining src/3) */ 
1333              * Too much overhead if we get near the end of the string, 
1334              * continue with the next loop. 
1345                     if( /* handle U+0000..U+FFFF inline */ 
1347                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
1348                         (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
1350                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
1351                         *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
1356                     if( /* handle U+0000..U+07FF inline */ 
1358                         (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
1360                         *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
1367                     *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1369                 } else if(subchar 
> 0xffff && --count 
== 0) { 
1371                      * We need to write two UChars, adjusted count for that, 
1372                      * and ran out of space. 
1376                     /* function call for error cases */ 
1377                     ++pSrc
; /* continue after the lead byte */ 
1378                     utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
1380                     if(subchar
<=0xFFFF) { 
1381                         *(pDest
++)=(UChar
)subchar
; 
1383                         *(pDest
++)=U16_LEAD(subchar
); 
1384                         *(pDest
++)=U16_TRAIL(subchar
); 
1388         } while(--count 
> 0); 
1391     while((pSrc
<pSrcLimit
) && (pDest
<pDestLimit
)) { 
1398                 if( /* handle U+0000..U+FFFF inline */ 
1400                     ((pSrcLimit 
- pSrc
) >= 3) && 
1401                     (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f && 
1402                     (t2 
= (uint8_t)(pSrc
[2] - 0x80)) <= 0x3f 
1404                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 
1405                     *pDest
++ = (UChar
)((ch 
<< 12) | (t1 
<< 6) | t2
); 
1410                 if( /* handle U+0000..U+07FF inline */ 
1412                     ((pSrcLimit 
- pSrc
) >= 2) && 
1413                     (t1 
= (uint8_t)(pSrc
[1] - 0x80)) <= 0x3f 
1415                     *pDest
++ = (UChar
)(((ch 
& 0x1f) << 6) | t1
); 
1422                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1425                 /* function call for error cases */ 
1426                 ++pSrc
; /* continue after the lead byte */ 
1427                 utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
1429                 if(subchar
<=0xFFFF) { 
1430                     *(pDest
++)=(UChar
)subchar
; 
1432                     *(pDest
++)=U16_LEAD(subchar
); 
1433                     if(pDest
<pDestLimit
) { 
1434                         *(pDest
++)=U16_TRAIL(subchar
); 
1444     /* do not fill the dest buffer just count the UChars needed */ 
1445     while(pSrc 
< pSrcLimit
){ 
1452                 if( /* handle U+0000..U+FFFF inline */ 
1454                     ((pSrcLimit 
- pSrc
) >= 3) && 
1455                     (uint8_t)(pSrc
[1] - 0x80) <= 0x3f && 
1456                     (uint8_t)(pSrc
[2] - 0x80) <= 0x3f 
1463                 if( /* handle U+0000..U+07FF inline */ 
1465                     ((pSrcLimit 
- pSrc
) >= 2) && 
1466                     (uint8_t)(pSrc
[1] - 0x80) <= 0x3f 
1475                 *pErrorCode 
= U_INVALID_CHAR_FOUND
; 
1478                 /* function call for error cases */ 
1479                 ++pSrc
; /* continue after the lead byte */ 
1480                 utf8_nextCharSafeBodyPointer(&pSrc
, pSrcLimit
, ch
); 
1482                 reqLength
+=U16_LENGTH(ch
); 
1487     if(pNumSubstitutions
!=NULL
) { 
1488         *pNumSubstitutions
=numSubstitutions
; 
1491     reqLength
+=(int32_t)(pDest 
- dest
); 
1493         *pDestLength 
= reqLength
; 
1496     /* Terminate the buffer */ 
1497     u_terminateUChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1501 U_CAPI 
char* U_EXPORT2 
 
1502 u_strToJavaModifiedUTF8( 
1504         int32_t destCapacity
, 
1505         int32_t *pDestLength
, 
1508         UErrorCode 
*pErrorCode
) { 
1509     int32_t reqLength
=0; 
1511     uint8_t *pDest 
= (uint8_t *)dest
; 
1512     uint8_t *pDestLimit 
= pDest 
+ destCapacity
; 
1513     const UChar 
*pSrcLimit
; 
1517     if(U_FAILURE(*pErrorCode
)){ 
1520     if( (src
==NULL 
&& srcLength
!=0) || srcLength 
< -1 || 
1521         (dest
==NULL 
&& destCapacity
!=0) || destCapacity
<0 
1523         *pErrorCode 
= U_ILLEGAL_ARGUMENT_ERROR
; 
1528         /* Convert NUL-terminated ASCII, then find the string length. */ 
1529         while((ch
=*src
)<=0x7f && ch 
!= 0 && pDest
<pDestLimit
) { 
1530             *pDest
++ = (uint8_t)ch
; 
1534             reqLength
=(int32_t)(pDest 
- (uint8_t *)dest
); 
1536                 *pDestLength 
= reqLength
; 
1539             /* Terminate the buffer */ 
1540             u_terminateChars(dest
, destCapacity
, reqLength
, pErrorCode
); 
1543         srcLength 
= u_strlen(src
); 
1546     /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 
1547     pSrcLimit 
= src
+srcLength
; 
1549         count 
= (int32_t)(pDestLimit 
- pDest
); 
1550         srcLength 
= (int32_t)(pSrcLimit 
- src
); 
1551         if(count 
>= srcLength 
&& srcLength 
> 0 && *src 
<= 0x7f) { 
1552             /* fast ASCII loop */ 
1553             const UChar 
*prevSrc 
= src
; 
1555             while(src 
< pSrcLimit 
&& (ch 
= *src
) <= 0x7f && ch 
!= 0) { 
1556                 *pDest
++=(uint8_t)ch
; 
1559             delta 
= (int32_t)(src 
- prevSrc
); 
1564          * Each iteration of the inner loop progresses by at most 3 UTF-8 
1565          * bytes and one UChar. 
1568         if(count 
> srcLength
) { 
1569             count 
= srcLength
; /* min(remaining dest/3, remaining src) */ 
1573              * Too much overhead if we get near the end of the string, 
1574              * continue with the next loop. 
1580             if(ch 
<= 0x7f && ch 
!= 0) { 
1581                 *pDest
++ = (uint8_t)ch
; 
1582             } else if(ch 
<= 0x7ff) { 
1583                 *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1584                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1586                 *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1587                 *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1588                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1590         } while(--count 
> 0); 
1593     while(src
<pSrcLimit
) { 
1595         if(ch 
<= 0x7f && ch 
!= 0) { 
1596             if(pDest
<pDestLimit
) { 
1597                 *pDest
++ = (uint8_t)ch
; 
1602         } else if(ch 
<= 0x7ff) { 
1603             if((pDestLimit 
- pDest
) >= 2) { 
1604                 *pDest
++=(uint8_t)((ch
>>6)|0xc0); 
1605                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1611             if((pDestLimit 
- pDest
) >= 3) { 
1612                 *pDest
++=(uint8_t)((ch
>>12)|0xe0); 
1613                 *pDest
++=(uint8_t)(((ch
>>6)&0x3f)|0x80); 
1614                 *pDest
++=(uint8_t)((ch
&0x3f)|0x80); 
1621     while(src
<pSrcLimit
) { 
1623         if(ch 
<= 0x7f && ch 
!= 0) { 
1625         } else if(ch
<=0x7ff) { 
1632     reqLength
+=(int32_t)(pDest 
- (uint8_t *)dest
); 
1634         *pDestLength 
= reqLength
; 
1637     /* Terminate the buffer */ 
1638     u_terminateChars(dest
, destCapacity
, reqLength
, pErrorCode
);