/*
******************************************************************************
*
-* Copyright (C) 2001-2006, International Business Machines
+* Copyright (C) 2001-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
#include "ustr_imp.h"
U_CAPI UChar* U_EXPORT2
-u_strFromUTF32(UChar *dest,
- int32_t destCapacity,
+u_strFromUTF32WithSub(UChar *dest,
+ int32_t destCapacity,
int32_t *pDestLength,
const UChar32 *src,
int32_t srcLength,
- UErrorCode *pErrorCode)
-{
- int32_t reqLength = 0;
- uint32_t ch =0;
- UChar *pDestLimit =dest+destCapacity;
- UChar *pDest = dest;
- const uint32_t *pSrc = (const uint32_t *)src;
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode) {
+ const UChar32 *srcLimit;
+ UChar32 ch;
+ UChar *destLimit;
+ UChar *pDest;
+ int32_t reqLength;
+ int32_t numSubstitutions;
/* args check */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+ if(U_FAILURE(*pErrorCode)){
return NULL;
}
-
- if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
- /* Check if the source is null terminated */
- if(srcLength == -1 ){
- while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){
- ++pSrc;
- if(ch<=0xFFFF){
- *(pDest++)=(UChar)ch;
- }else if(ch<=0x10ffff){
- *(pDest++)=UTF16_LEAD(ch);
- if(pDest<pDestLimit){
- *(pDest++)=UTF16_TRAIL(ch);
- }else{
- reqLength++;
- break;
- }
- }else{
- *pErrorCode = U_INVALID_CHAR_FOUND;
- return NULL;
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = 0;
+ }
+
+ pDest = dest;
+ destLimit = dest + destCapacity;
+ reqLength = 0;
+ numSubstitutions = 0;
+
+ if(srcLength < 0) {
+ /* simple loop for conversion of a NUL-terminated BMP string */
+ while((ch=*src) != 0 &&
+ ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
+ ++src;
+ if(pDest < destLimit) {
+ *pDest++ = (UChar)ch;
+ } else {
+ ++reqLength;
}
}
- while((ch=*pSrc++) != 0){
- reqLength+=UTF_CHAR_LENGTH(ch);
+ srcLimit = src;
+ if(ch != 0) {
+ /* "complicated" case, find the end of the remaining string */
+ while(*++srcLimit != 0) {}
}
- }else{
- const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength;
- while((pSrc < pSrcLimit) && (pDest < pDestLimit)){
- ch = *pSrc++;
- if(ch<=0xFFFF){
- *(pDest++)=(UChar)ch;
- }else if(ch<=0x10FFFF){
- *(pDest++)=UTF16_LEAD(ch);
- if(pDest<pDestLimit){
- *(pDest++)=UTF16_TRAIL(ch);
- }else{
- reqLength++;
- break;
+ } else {
+ srcLimit = src + srcLength;
+ }
+
+ /* convert with length */
+ while(src < srcLimit) {
+ ch = *src++;
+ do {
+ /* usually "loops" once; twice only for writing subchar */
+ if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
+ if(pDest < destLimit) {
+ *pDest++ = (UChar)ch;
+ } else {
+ ++reqLength;
+ }
+ break;
+ } else if(0x10000 <= ch && ch <= 0x10ffff) {
+ if((pDest + 2) <= destLimit) {
+ *pDest++ = U16_LEAD(ch);
+ *pDest++ = U16_TRAIL(ch);
+ } else {
+ reqLength += 2;
}
- }else{
+ break;
+ } else if((ch = subchar) < 0) {
+ /* surrogate code point, or not a Unicode code point at all */
*pErrorCode = U_INVALID_CHAR_FOUND;
return NULL;
+ } else {
+ ++numSubstitutions;
}
- }
- while(pSrc <pSrcLimit){
- ch = *pSrc++;
- reqLength+=UTF_CHAR_LENGTH(ch);
- }
+ } while(TRUE);
}
reqLength += (int32_t)(pDest - dest);
- if(pDestLength){
+ if(pDestLength) {
*pDestLength = reqLength;
}
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = numSubstitutions;
+ }
/* Terminate the buffer */
- u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
+ u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
return dest;
}
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF32(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return u_strFromUTF32WithSub(
+ dest, destCapacity, pDestLength,
+ src, srcLength,
+ U_SENTINEL, NULL,
+ pErrorCode);
+}
U_CAPI UChar32* U_EXPORT2
-u_strToUTF32(UChar32 *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const UChar *src,
- int32_t srcLength,
- UErrorCode *pErrorCode)
-{
- const UChar* pSrc = src;
- const UChar* pSrcLimit;
- int32_t reqLength=0;
- uint32_t ch=0;
- uint32_t *pDest = (uint32_t *)dest;
- uint32_t *pDestLimit = pDest + destCapacity;
- UChar ch2=0;
+u_strToUTF32WithSub(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode) {
+ const UChar *srcLimit;
+ UChar32 ch;
+ UChar ch2;
+ UChar32 *destLimit;
+ UChar32 *pDest;
+ int32_t reqLength;
+ int32_t numSubstitutions;
/* args check */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+ if(U_FAILURE(*pErrorCode)){
return NULL;
}
-
-
- if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
- if(srcLength==-1) {
- while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
- ++pSrc;
- /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
- if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- ch=UTF16_GET_PAIR_VALUE(ch, ch2);
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = 0;
+ }
+
+ pDest = dest;
+ destLimit = dest + destCapacity;
+ reqLength = 0;
+ numSubstitutions = 0;
+
+ if(srcLength < 0) {
+ /* simple loop for conversion of a NUL-terminated BMP string */
+ while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
+ ++src;
+ if(pDest < destLimit) {
+ *pDest++ = ch;
+ } else {
+ ++reqLength;
}
- *(pDest++)= ch;
}
- while((ch=*pSrc++)!=0) {
- if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- }
- ++reqLength;
+ srcLimit = src;
+ if(ch != 0) {
+ /* "complicated" case, find the end of the remaining string */
+ while(*++srcLimit != 0) {}
}
} else {
- pSrcLimit = pSrc+srcLength;
- while(pSrc<pSrcLimit && pDest<pDestLimit) {
- ch=*pSrc++;
- if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- ch=UTF16_GET_PAIR_VALUE(ch, ch2);
- }
- *(pDest++)= ch;
+ srcLimit = src + srcLength;
+ }
+
+ /* convert with length */
+ while(src < srcLimit) {
+ ch = *src++;
+ if(!U16_IS_SURROGATE(ch)) {
+ /* write or count ch below */
+ } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
+ ++src;
+ ch = U16_GET_SUPPLEMENTARY(ch, ch2);
+ } else if((ch = subchar) < 0) {
+ /* unpaired surrogate */
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else {
+ ++numSubstitutions;
}
- while(pSrc!=pSrcLimit) {
- ch=*pSrc++;
- if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
- ++pSrc;
- }
+ if(pDest < destLimit) {
+ *pDest++ = ch;
+ } else {
++reqLength;
}
}
- reqLength+=(int32_t)(pDest - (uint32_t *)dest);
- if(pDestLength){
+ reqLength += (int32_t)(pDest - dest);
+ if(pDestLength) {
*pDestLength = reqLength;
}
+ if(pNumSubstitutions != NULL) {
+ *pNumSubstitutions = numSubstitutions;
+ }
/* Terminate the buffer */
- u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode);
+ u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
return dest;
}
+U_CAPI UChar32* U_EXPORT2
+u_strToUTF32(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return u_strToUTF32WithSub(
+ dest, destCapacity, pDestLength,
+ src, srcLength,
+ U_SENTINEL, NULL,
+ pErrorCode);
+}
+
/* for utf8_nextCharSafeBodyTerminated() */
static const UChar32
utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode){
-
UChar *pDest = dest;
UChar *pDestLimit = dest+destCapacity;
UChar32 ch;
return NULL;
}
- if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
subchar > 0x10ffff || U_IS_SURROGATE(subchar)
) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=0;
+ }
numSubstitutions=0;
/*
*(pDest++)=(UChar)ch;
}else{
*(pDest++)=UTF16_LEAD(ch);
- if(pDest<pDestLimit){
- *(pDest++)=UTF16_TRAIL(ch);
- }else{
- reqLength++;
- break;
- }
+ *(pDest++)=UTF16_TRAIL(ch);
}
}
} while(--count > 0);
}
}
}
- /* donot fill the dest buffer just count the UChars needed */
+ /* do not fill the dest buffer just count the UChars needed */
while(pSrc < pSrcLimit){
ch = *pSrc;
if(ch <= 0x7f){
const char *src,
int32_t srcLength,
UErrorCode *pErrorCode) {
-
UChar *pDest = dest;
UChar32 ch;
int32_t reqLength = 0;
return NULL;
}
- if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)) {
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0)
+ ) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
} else if(ch < 0xe0) { /* U+0080..U+07FF */
if(pSrc < pSrcLimit) {
/* 0x3080 = (0xc0 << 6) + 0x80 */
- *pDest++ = (UChar)(((ch & 0x1f) << 6) + *pSrc++ - 0x3080);
+ *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
continue;
}
} else if(ch < 0xf0) { /* U+0800..U+FFFF */
int32_t srcLength,
UChar32 subchar, int32_t *pNumSubstitutions,
UErrorCode *pErrorCode){
-
int32_t reqLength=0;
uint32_t ch=0,ch2=0;
uint8_t *pDest = (uint8_t *)dest;
return NULL;
}
- if( (pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0) ||
+ if( (pSrc==NULL && srcLength!=0) || srcLength < -1 ||
+ (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
subchar > 0x10ffff || U_IS_SURROGATE(subchar)
) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=0;
+ }
numSubstitutions=0;
if(srcLength==-1) {
++pSrc;
if(ch <= 0x7f) {
if(pDest<pDestLimit) {
- *pDest++ = (char)ch;
+ *pDest++ = (uint8_t)ch;
} else {
reqLength = 1;
break;
do {
ch=*pSrc++;
if(ch <= 0x7f) {
- *pDest++ = (char)ch;
+ *pDest++ = (uint8_t)ch;
} else if(ch <= 0x7ff) {
*pDest++=(uint8_t)((ch>>6)|0xc0);
*pDest++=(uint8_t)((ch&0x3f)|0x80);
ch=*pSrc++;
if(ch <= 0x7f) {
if(pDest<pDestLimit) {
- *pDest++ = (char)ch;
+ *pDest++ = (uint8_t)ch;
} else {
reqLength = 1;
break;
}
/* Terminate the buffer */
- u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
-
- return (char*)dest;
+ u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
}
U_CAPI char* U_EXPORT2
U_SENTINEL, NULL,
pErrorCode);
}
+
+U_CAPI UChar* U_EXPORT2
+u_strFromJavaModifiedUTF8WithSub(
+ UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode) {
+ UChar *pDest = dest;
+ UChar *pDestLimit = dest+destCapacity;
+ UChar32 ch;
+ int32_t reqLength = 0;
+ const uint8_t* pSrc = (const uint8_t*) src;
+ const uint8_t *pSrcLimit;
+ int32_t count;
+ uint8_t t1, t2; /* trail bytes */
+ int32_t numSubstitutions;
+
+ /* args check */
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (dest==NULL && destCapacity!=0) || destCapacity<0 ||
+ subchar > 0x10ffff || U_IS_SURROGATE(subchar)
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=0;
+ }
+ numSubstitutions=0;
+
+ if(srcLength < 0) {
+ /*
+ * Transform a NUL-terminated ASCII string.
+ * Handle non-ASCII strings with slower code.
+ */
+ while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) {
+ *pDest++=(UChar)ch;
+ ++pSrc;
+ }
+ if(ch == 0) {
+ reqLength=(int32_t)(pDest - dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+ }
+ srcLength = uprv_strlen((const char *)pSrc);
+ }
+
+ /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
+ pSrcLimit = pSrc + srcLength;
+ for(;;) {
+ count = (int32_t)(pDestLimit - pDest);
+ srcLength = (int32_t)(pSrcLimit - pSrc);
+ if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) {
+ /* fast ASCII loop */
+ const uint8_t *prevSrc = pSrc;
+ int32_t delta;
+ while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) {
+ *pDest++=(UChar)ch;
+ ++pSrc;
+ }
+ delta = (int32_t)(pSrc - prevSrc);
+ count -= delta;
+ srcLength -= delta;
+ }
+ /*
+ * Each iteration of the inner loop progresses by at most 3 UTF-8
+ * bytes and one UChar.
+ */
+ srcLength /= 3;
+ if(count > srcLength) {
+ count = srcLength; /* min(remaining dest, remaining src/3) */
+ }
+ if(count < 3) {
+ /*
+ * Too much overhead if we get near the end of the string,
+ * continue with the next loop.
+ */
+ break;
+ }
+ do {
+ ch = *pSrc;
+ if(ch <= 0x7f){
+ *pDest++=(UChar)ch;
+ ++pSrc;
+ } else {
+ if(ch >= 0xe0) {
+ if( /* handle U+0000..U+FFFF inline */
+ ch <= 0xef &&
+ (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
+ (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
+ ) {
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
+ pSrc += 3;
+ continue;
+ }
+ } else {
+ if( /* handle U+0000..U+07FF inline */
+ ch >= 0xc0 &&
+ (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
+ ) {
+ *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
+ pSrc += 2;
+ continue;
+ }
+ }
+
+ if(subchar < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else if(subchar > 0xffff && --count == 0) {
+ /*
+ * We need to write two UChars, adjusted count for that,
+ * and ran out of space.
+ */
+ break;
+ } else {
+ /* function call for error cases */
+ ++pSrc; /* continue after the lead byte */
+ utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
+ ++numSubstitutions;
+ if(subchar<=0xFFFF) {
+ *(pDest++)=(UChar)subchar;
+ } else {
+ *(pDest++)=U16_LEAD(subchar);
+ *(pDest++)=U16_TRAIL(subchar);
+ }
+ }
+ }
+ } while(--count > 0);
+ }
+
+ while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
+ ch = *pSrc;
+ if(ch <= 0x7f){
+ *pDest++=(UChar)ch;
+ ++pSrc;
+ } else {
+ if(ch >= 0xe0) {
+ if( /* handle U+0000..U+FFFF inline */
+ ch <= 0xef &&
+ ((pSrcLimit - pSrc) >= 3) &&
+ (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
+ (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
+ ) {
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
+ pSrc += 3;
+ continue;
+ }
+ } else {
+ if( /* handle U+0000..U+07FF inline */
+ ch >= 0xc0 &&
+ ((pSrcLimit - pSrc) >= 2) &&
+ (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
+ ) {
+ *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
+ pSrc += 2;
+ continue;
+ }
+ }
+
+ if(subchar < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else {
+ /* function call for error cases */
+ ++pSrc; /* continue after the lead byte */
+ utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
+ ++numSubstitutions;
+ if(subchar<=0xFFFF) {
+ *(pDest++)=(UChar)subchar;
+ } else {
+ *(pDest++)=U16_LEAD(subchar);
+ if(pDest<pDestLimit) {
+ *(pDest++)=U16_TRAIL(subchar);
+ } else {
+ reqLength++;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* do not fill the dest buffer just count the UChars needed */
+ while(pSrc < pSrcLimit){
+ ch = *pSrc;
+ if(ch <= 0x7f) {
+ reqLength++;
+ ++pSrc;
+ } else {
+ if(ch >= 0xe0) {
+ if( /* handle U+0000..U+FFFF inline */
+ ch <= 0xef &&
+ ((pSrcLimit - pSrc) >= 3) &&
+ (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
+ (uint8_t)(pSrc[2] - 0x80) <= 0x3f
+ ) {
+ reqLength++;
+ pSrc += 3;
+ continue;
+ }
+ } else {
+ if( /* handle U+0000..U+07FF inline */
+ ch >= 0xc0 &&
+ ((pSrcLimit - pSrc) >= 2) &&
+ (uint8_t)(pSrc[1] - 0x80) <= 0x3f
+ ) {
+ reqLength++;
+ pSrc += 2;
+ continue;
+ }
+ }
+
+ if(subchar < 0) {
+ *pErrorCode = U_INVALID_CHAR_FOUND;
+ return NULL;
+ } else {
+ /* function call for error cases */
+ ++pSrc; /* continue after the lead byte */
+ utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
+ ++numSubstitutions;
+ reqLength+=U16_LENGTH(ch);
+ }
+ }
+ }
+
+ if(pNumSubstitutions!=NULL) {
+ *pNumSubstitutions=numSubstitutions;
+ }
+
+ reqLength+=(int32_t)(pDest - dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+}
+
+U_CAPI char* U_EXPORT2
+u_strToJavaModifiedUTF8(
+ char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ int32_t reqLength=0;
+ uint32_t ch=0;
+ uint8_t *pDest = (uint8_t *)dest;
+ uint8_t *pDestLimit = pDest + destCapacity;
+ const UChar *pSrcLimit;
+ int32_t count;
+
+ /* args check */
+ if(U_FAILURE(*pErrorCode)){
+ return NULL;
+ }
+ if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+ (dest==NULL && destCapacity!=0) || destCapacity<0
+ ) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+
+ if(srcLength==-1) {
+ /* Convert NUL-terminated ASCII, then find the string length. */
+ while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
+ *pDest++ = (uint8_t)ch;
+ ++src;
+ }
+ if(ch == 0) {
+ reqLength=(int32_t)(pDest - (uint8_t *)dest);
+ if(pDestLength) {
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+ }
+ srcLength = u_strlen(src);
+ }
+
+ /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
+ pSrcLimit = src+srcLength;
+ for(;;) {
+ count = (int32_t)(pDestLimit - pDest);
+ srcLength = (int32_t)(pSrcLimit - src);
+ if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
+ /* fast ASCII loop */
+ const UChar *prevSrc = src;
+ int32_t delta;
+ while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
+ *pDest++=(uint8_t)ch;
+ ++src;
+ }
+ delta = (int32_t)(src - prevSrc);
+ count -= delta;
+ srcLength -= delta;
+ }
+ /*
+ * Each iteration of the inner loop progresses by at most 3 UTF-8
+ * bytes and one UChar.
+ */
+ count /= 3;
+ if(count > srcLength) {
+ count = srcLength; /* min(remaining dest/3, remaining src) */
+ }
+ if(count < 3) {
+ /*
+ * Too much overhead if we get near the end of the string,
+ * continue with the next loop.
+ */
+ break;
+ }
+ do {
+ ch=*src++;
+ if(ch <= 0x7f && ch != 0) {
+ *pDest++ = (uint8_t)ch;
+ } else if(ch <= 0x7ff) {
+ *pDest++=(uint8_t)((ch>>6)|0xc0);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ *pDest++=(uint8_t)((ch>>12)|0xe0);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ }
+ } while(--count > 0);
+ }
+
+ while(src<pSrcLimit) {
+ ch=*src++;
+ if(ch <= 0x7f && ch != 0) {
+ if(pDest<pDestLimit) {
+ *pDest++ = (uint8_t)ch;
+ } else {
+ reqLength = 1;
+ break;
+ }
+ } else if(ch <= 0x7ff) {
+ if((pDestLimit - pDest) >= 2) {
+ *pDest++=(uint8_t)((ch>>6)|0xc0);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 2;
+ break;
+ }
+ } else {
+ if((pDestLimit - pDest) >= 3) {
+ *pDest++=(uint8_t)((ch>>12)|0xe0);
+ *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
+ *pDest++=(uint8_t)((ch&0x3f)|0x80);
+ } else {
+ reqLength = 3;
+ break;
+ }
+ }
+ }
+ while(src<pSrcLimit) {
+ ch=*src++;
+ if(ch <= 0x7f && ch != 0) {
+ ++reqLength;
+ } else if(ch<=0x7ff) {
+ reqLength+=2;
+ } else {
+ reqLength+=3;
+ }
+ }
+
+ reqLength+=(int32_t)(pDest - (uint8_t *)dest);
+ if(pDestLength){
+ *pDestLength = reqLength;
+ }
+
+ /* Terminate the buffer */
+ u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
+ return dest;
+}