2 ******************************************************************************
4 * Copyright (C) 2001-2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 9/10/2001 Ram Creation.
15 ******************************************************************************
18 /*******************************************************************************
20 * u_strTo* and u_strFrom* APIs
22 *******************************************************************************
26 #include "unicode/putil.h"
27 #include "unicode/ucnv.h"
28 #include "unicode/ustring.h"
36 u_growAnyBufferFromStatic(void *context
,
37 void **pBuffer
, int32_t *pCapacity
, int32_t reqCapacity
,
38 int32_t length
, int32_t size
) {
40 void *newBuffer
=uprv_malloc(reqCapacity
*size
);
43 uprv_memcpy(newBuffer
, *pBuffer
, length
*size
);
45 *pCapacity
=reqCapacity
;
50 /* release the old pBuffer if it was not statically allocated */
51 if(*pBuffer
!=(void *)context
) {
56 return (UBool
)(newBuffer
!=NULL
);
59 #define _STACK_BUFFER_CAPACITY 1000
61 U_CAPI UChar
* U_EXPORT2
62 u_strFromUTF32(UChar
*dest
,
67 UErrorCode
*pErrorCode
)
69 int32_t reqLength
= 0;
71 UChar
*pDestLimit
=dest
+destCapacity
;
73 const uint32_t *pSrc
= (const uint32_t *)src
;
76 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
80 if((srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
81 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
85 /* Check if the source is null terminated */
87 while(((ch
=*pSrc
)!=0) && (pDest
< pDestLimit
)){
91 }else if(ch
<=0x10ffff){
92 *(pDest
++)=UTF16_LEAD(ch
);
94 *(pDest
++)=UTF16_TRAIL(ch
);
100 *pErrorCode
= U_INVALID_CHAR_FOUND
;
104 while((ch
=*pSrc
++) != 0){
105 reqLength
+=UTF_CHAR_LENGTH(ch
);
108 const uint32_t* pSrcLimit
= ((const uint32_t*)pSrc
) + srcLength
;
109 while((pSrc
< pSrcLimit
) && (pDest
< pDestLimit
)){
112 *(pDest
++)=(UChar
)ch
;
113 }else if(ch
<=0x10FFFF){
114 *(pDest
++)=UTF16_LEAD(ch
);
115 if(pDest
<pDestLimit
){
116 *(pDest
++)=UTF16_TRAIL(ch
);
122 *pErrorCode
= U_INVALID_CHAR_FOUND
;
126 while(pSrc
<pSrcLimit
){
128 reqLength
+=UTF_CHAR_LENGTH(ch
);
132 reqLength
+= pDest
- dest
;
134 *pDestLength
= reqLength
;
137 /* Terminate the buffer */
138 u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
);
144 U_CAPI UChar32
* U_EXPORT2
145 u_strToUTF32(UChar32
*dest
,
146 int32_t destCapacity
,
147 int32_t *pDestLength
,
150 UErrorCode
*pErrorCode
)
152 const UChar
* pSrc
= src
;
153 const UChar
* pSrcLimit
;
156 uint32_t *pDest
= (uint32_t *)dest
;
157 uint32_t *pDestLimit
= pDest
+ destCapacity
;
161 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
166 if((srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
167 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
172 while((ch
=*pSrc
)!=0 && pDest
!=pDestLimit
) {
174 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
175 if(UTF_IS_LEAD(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
177 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
181 while((ch
=*pSrc
++)!=0) {
182 if(UTF_IS_LEAD(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
188 pSrcLimit
= pSrc
+srcLength
;
189 while(pSrc
<pSrcLimit
&& pDest
<pDestLimit
) {
191 if(UTF_IS_LEAD(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
193 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
197 while(pSrc
!=pSrcLimit
) {
199 if(UTF_IS_LEAD(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
206 reqLength
+=(pDest
- (uint32_t *)dest
);
208 *pDestLength
= reqLength
;
211 /* Terminate the buffer */
212 u_terminateUChar32s(dest
,destCapacity
,reqLength
,pErrorCode
);
217 U_CAPI UChar
* U_EXPORT2
218 u_strFromUTF8(UChar
*dest
,
219 int32_t destCapacity
,
220 int32_t *pDestLength
,
223 UErrorCode
*pErrorCode
){
226 UChar
*pDestLimit
= dest
+destCapacity
;
229 int32_t reqLength
= 0;
230 uint8_t* pSrc
= (uint8_t*) src
;
233 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
237 if((srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
238 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
243 srcLength
= uprv_strlen((char*)pSrc
);
246 while((index
< srcLength
)&&(pDest
<pDestLimit
)){
251 ch
=utf8_nextCharSafeBody(pSrc
, &index
, srcLength
, ch
, -1);
253 *pErrorCode
= U_INVALID_CHAR_FOUND
;
255 }else if(ch
<=0xFFFF){
256 *(pDest
++)=(UChar
)ch
;
258 *(pDest
++)=UTF16_LEAD(ch
);
259 if(pDest
<pDestLimit
){
260 *(pDest
++)=UTF16_TRAIL(ch
);
268 /* donot fill the dest buffer just count the UChars needed */
269 while(index
< srcLength
){
274 ch
=utf8_nextCharSafeBody(pSrc
, &index
, srcLength
, ch
, -1);
276 *pErrorCode
= U_INVALID_CHAR_FOUND
;
279 reqLength
+=UTF_CHAR_LENGTH(ch
);
283 reqLength
+=(pDest
- dest
);
286 *pDestLength
= reqLength
;
289 /* Terminate the buffer */
290 u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
);
295 static U_INLINE
uint8_t *
296 _appendUTF8(uint8_t *pDest
, UChar32 c
) {
297 /* c<=0x7f is handled by the caller, here it is 0x80<=c<=0x10ffff */
299 *pDest
++=(uint8_t)((c
>>6)|0xc0);
300 *pDest
++=(uint8_t)((c
&0x3f)|0x80);
301 } else if((uint32_t)(c
)<=0xffff) {
302 *pDest
++=(uint8_t)((c
>>12)|0xe0);
303 *pDest
++=(uint8_t)(((c
>>6)&0x3f)|0x80);
304 *pDest
++=(uint8_t)(((c
)&0x3f)|0x80);
305 } else /* if((uint32_t)(c)<=0x10ffff) */ {
306 *pDest
++=(uint8_t)(((c
)>>18)|0xf0);
307 *pDest
++=(uint8_t)((((c
)>>12)&0x3f)|0x80);
308 *pDest
++=(uint8_t)((((c
)>>6)&0x3f)|0x80);
309 *pDest
++=(uint8_t)(((c
)&0x3f)|0x80);
315 U_CAPI
char* U_EXPORT2
316 u_strToUTF8(char *dest
,
317 int32_t destCapacity
,
318 int32_t *pDestLength
,
321 UErrorCode
*pErrorCode
){
324 const UChar
*pSrcLimit
;
326 uint8_t *pDest
= (uint8_t *)dest
;
327 uint8_t *pDestLimit
= pDest
+ destCapacity
;
331 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
335 if((srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
336 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
341 while((ch
=*pSrc
)!=0 && pDest
!=pDestLimit
) {
349 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
350 if(UTF_IS_SURROGATE(ch
)) {
351 if(UTF_IS_SURROGATE_FIRST(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
353 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
355 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
356 *pErrorCode
= U_INVALID_CHAR_FOUND
;
360 reqLength
+= UTF8_CHAR_LENGTH(ch
);
361 /* do we have enough room in destination? */
362 if(destCapacity
< reqLength
){
365 /* convert and append*/
366 pDest
=_appendUTF8(pDest
, ch
);
368 while((ch
=*pSrc
++)!=0) {
371 } else if(ch
<=0x7ff) {
373 } else if(!UTF_IS_SURROGATE(ch
)) {
375 } else if(UTF_IS_SURROGATE_FIRST(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
379 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
380 *pErrorCode
= U_INVALID_CHAR_FOUND
;
385 pSrcLimit
= pSrc
+srcLength
;
386 while(pSrc
<pSrcLimit
&& pDest
<pDestLimit
) {
394 if(UTF_IS_SURROGATE(ch
)) {
395 if(UTF_IS_SURROGATE_FIRST(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
397 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
399 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
400 *pErrorCode
= U_INVALID_CHAR_FOUND
;
404 reqLength
+= UTF8_CHAR_LENGTH(ch
);
405 /* do we have enough room in destination? */
406 if(destCapacity
< reqLength
){
409 /* convert and append*/
410 pDest
=_appendUTF8(pDest
, ch
);
412 while(pSrc
<pSrcLimit
) {
416 } else if(ch
<=0x7ff) {
418 } else if(!UTF_IS_SURROGATE(ch
)) {
420 } else if(UTF_IS_SURROGATE_FIRST(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
424 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
425 *pErrorCode
= U_INVALID_CHAR_FOUND
;
432 *pDestLength
= reqLength
;
435 /* Terminate the buffer */
436 u_terminateChars((char*)dest
,destCapacity
,reqLength
,pErrorCode
);
441 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
442 /* helper function */
444 _strToWCS(wchar_t *dest
,
445 int32_t destCapacity
,
446 int32_t *pDestLength
,
449 UErrorCode
*pErrorCode
){
451 char stackBuffer
[_STACK_BUFFER_CAPACITY
];
452 char* tempBuf
= stackBuffer
;
453 int32_t tempBufCapacity
= _STACK_BUFFER_CAPACITY
;
454 char* tempBufLimit
= stackBuffer
+ tempBufCapacity
;
455 UConverter
* conv
= NULL
;
456 char* saveBuf
= tempBuf
;
457 wchar_t* intTarget
=NULL
;
458 int32_t intTargetCapacity
=0;
459 int count
=0,retVal
=0;
461 const UChar
*pSrcLimit
=NULL
;
462 const UChar
*pSrc
= src
;
464 conv
= u_getDefaultConverter(pErrorCode
);
466 if(U_FAILURE(*pErrorCode
)){
471 srcLength
= u_strlen(pSrc
);
474 pSrcLimit
= pSrc
+ srcLength
;
477 /* reset the error state */
478 *pErrorCode
= U_ZERO_ERROR
;
480 /* convert to chars using default converter */
481 ucnv_fromUnicode(conv
,&tempBuf
,tempBufLimit
,&pSrc
,pSrcLimit
,NULL
,(UBool
)(pSrc
==pSrcLimit
),pErrorCode
);
482 count
=(tempBuf
- saveBuf
);
484 /* This should rarely occur */
485 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
488 /* we dont have enough room on the stack grow the buffer */
489 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
490 (2*(pSrcLimit
-pSrc
)+100), count
,sizeof(char))){
495 tempBufLimit
= tempBuf
+ tempBufCapacity
;
496 tempBuf
= tempBuf
+ count
;
503 if(U_FAILURE(*pErrorCode
)){
507 /* done with conversion null terminate the char buffer */
508 if(count
>=tempBufCapacity
){
510 /* we dont have enough room on the stack grow the buffer */
511 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
512 tempBufCapacity
-count
+1, count
,sizeof(char))){
521 /* allocate more space than required
522 * here we assume that every char requires
523 * no more than 2 wchar_ts
525 intTargetCapacity
= (count
*2+1) /*for null termination */;
526 intTarget
= (wchar_t*)uprv_malloc( intTargetCapacity
* sizeof(wchar_t) );
531 int32_t remaining
= intTargetCapacity
;
532 wchar_t* pIntTarget
=intTarget
;
535 /* now convert the mbs to wcs */
538 /* we can call the system API since we are sure that
539 * there is atleast 1 null in the input
541 retVal
= uprv_mbstowcs(pIntTarget
,(tempBuf
+nulLen
),remaining
);
544 *pErrorCode
= U_INVALID_CHAR_FOUND
;
546 }else if(retVal
== remaining
){/* should never occur */
547 int numWritten
= (pIntTarget
-intTarget
);
548 u_growAnyBufferFromStatic(NULL
,(void**) &intTarget
,
553 pIntTarget
= intTarget
;
554 remaining
=intTargetCapacity
;
556 if(nulLen
!=count
){ /*there are embedded nulls*/
557 pIntTarget
+=numWritten
;
558 remaining
-=numWritten
;
563 /* we donot check for limit since tempBuf is null terminated */
564 while(tempBuf
[nulLen
++] != 0){
566 pIntTarget
= pIntTarget
+ retVal
+1;
567 remaining
-=(retVal
+1);
569 /* check if we have reached the source limit*/
575 count
= (int32_t)(pIntTarget
-intTarget
);
577 if(0 < count
&& count
<= destCapacity
){
578 uprv_memcpy(dest
,intTarget
,count
*sizeof(wchar_t));
582 *pDestLength
= count
;
585 /* free the allocated memory */
586 uprv_free(intTarget
);
589 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
592 /* are we still using stack buffer */
593 if(stackBuffer
!= saveBuf
){
596 u_terminateWChars(dest
,destCapacity
,count
,pErrorCode
);
598 u_releaseDefaultConverter(conv
);
604 U_CAPI
wchar_t* U_EXPORT2
605 u_strToWCS(wchar_t *dest
,
606 int32_t destCapacity
,
607 int32_t *pDestLength
,
610 UErrorCode
*pErrorCode
){
613 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
617 if((srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
618 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
622 #ifdef U_WCHAR_IS_UTF16
623 /* wchar_t is UTF-16 just do a memcpy */
625 srcLength
= u_strlen(src
);
627 if(0 < srcLength
&& srcLength
<= destCapacity
){
628 uprv_memcpy(dest
,src
,srcLength
*U_SIZEOF_UCHAR
);
631 *pDestLength
= srcLength
;
634 u_terminateUChars(dest
,destCapacity
,srcLength
,pErrorCode
);
638 #elif defined U_WCHAR_IS_UTF32
640 return (wchar_t*)u_strToUTF32((UChar32
*)dest
, destCapacity
, pDestLength
,
641 src
, srcLength
, pErrorCode
);
645 return _strToWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
, pErrorCode
);
650 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
651 /* helper function */
653 _strFromWCS( UChar
*dest
,
654 int32_t destCapacity
,
655 int32_t *pDestLength
,
658 UErrorCode
*pErrorCode
){
660 int32_t retVal
=0, count
=0 ;
661 UConverter
* conv
= NULL
;
662 UChar
* pTarget
= NULL
;
663 UChar
* pTargetLimit
= NULL
;
664 UChar
* target
= NULL
;
666 UChar uStack
[_STACK_BUFFER_CAPACITY
];
668 wchar_t wStack
[_STACK_BUFFER_CAPACITY
];
669 wchar_t* pWStack
= wStack
;
672 char cStack
[_STACK_BUFFER_CAPACITY
];
673 int32_t cStackCap
= _STACK_BUFFER_CAPACITY
;
676 char* pCSrcLimit
=NULL
;
678 const wchar_t* pSrc
= src
;
679 const wchar_t* pSrcLimit
= NULL
;
682 /* if the wchar_t source is null terminated we can safely
683 * assume that there are no embedded nulls, this is a fast
684 * path for null terminated strings.
687 /* convert wchars to chars */
688 retVal
= uprv_wcstombs(pCSrc
,src
, cStackCap
);
691 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
693 }else if(retVal
== cStackCap
){
694 /* Should rarely occur */
695 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
696 cStackCap
*2,0,sizeof(char));
699 /* converted every thing */
700 pCSrc
= pCSrc
+retVal
;
706 /* here the source is not null terminated
707 * so it may have nulls embeded and we need to
708 * do some extra processing
710 int32_t remaining
=cStackCap
;
712 pSrcLimit
= src
+ srcLength
;
715 register int32_t nulLen
= 0;
717 /* find nulls in the string */
718 while(nulLen
<srcLength
&& pSrc
[nulLen
++]!=0){
721 if((pSrc
+nulLen
) < pSrcLimit
){
722 /* check if we have enough room in pCSrc */
723 if(remaining
< (nulLen
* MB_CUR_MAX
)){
724 /* should rarely occur */
725 int32_t len
= (pCSrc
-pCSave
);
727 /* we do not have enough room so grow the buffer*/
728 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
729 2*cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
733 remaining
= cStackCap
-(pCSrc
- pCSave
);
736 /* we have found a null so convert the
737 * chunk from begining of non-null char to null
739 retVal
= uprv_wcstombs(pCSrc
,pSrc
,remaining
);
742 /* an error occurred bail out */
743 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
747 pCSrc
+= retVal
+1 /* already null terminated */;
749 pSrc
+= nulLen
; /* skip past the null */
750 srcLength
-=nulLen
; /* decrement the srcLength */
751 remaining
-= (pCSrc
-pCSave
);
755 /* the source is not null terminated and we are
756 * end of source so we copy the source to a temp buffer
757 * null terminate it and convert wchar_ts to chars
759 if(nulLen
> _STACK_BUFFER_CAPACITY
){
760 /* Should rarely occcur */
761 /* allocate new buffer buffer */
762 pWStack
=(wchar_t*) uprv_malloc(sizeof(wchar_t) * nulLen
);
764 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
769 /* copy the contents to tempStack */
770 uprv_memcpy(pWStack
,pSrc
,nulLen
*sizeof(wchar_t));
773 /* null terminate the tempBuffer */
776 if(remaining
< (nulLen
* MB_CUR_MAX
)){
777 /* Should rarely occur */
778 int32_t len
= (pCSrc
-pCSave
);
780 /* we do not have enough room so grow the buffer*/
781 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
782 cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
786 remaining
= cStackCap
-(pCSrc
- pCSave
);
788 /* convert to chars */
789 retVal
= uprv_wcstombs(pCSrc
,pWStack
,remaining
);
793 srcLength
-=nulLen
; /* decrement the srcLength */
799 /* OK..now we have converted from wchar_ts to chars now
800 * convert chars to UChars
804 pTarget
= target
= dest
;
805 pTargetLimit
= dest
+ destCapacity
;
807 conv
= u_getDefaultConverter(pErrorCode
);
809 if(U_FAILURE(*pErrorCode
)|| conv
==NULL
){
815 *pErrorCode
= U_ZERO_ERROR
;
817 /* convert to stack buffer*/
818 ucnv_toUnicode(conv
,&pTarget
,pTargetLimit
,(const char**)&pCSrc
,pCSrcLimit
,NULL
,(UBool
)(pCSrc
==pCSrcLimit
),pErrorCode
);
820 /* increment count to number written to stack */
821 count
+= pTarget
- target
;
823 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
826 pTargetLimit
= uStack
+ _STACK_BUFFER_CAPACITY
;
837 u_terminateUChars(dest
,destCapacity
,count
,pErrorCode
);
841 if(cStack
!= pCSave
){
845 if(wStack
!= pWStack
){
849 u_releaseDefaultConverter(conv
);
855 U_CAPI UChar
* U_EXPORT2
856 u_strFromWCS(UChar
*dest
,
857 int32_t destCapacity
,
858 int32_t *pDestLength
,
861 UErrorCode
*pErrorCode
)
865 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
869 if((srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
870 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
874 #ifdef U_WCHAR_IS_UTF16
875 /* wchar_t is UTF-16 just do a memcpy */
877 srcLength
= u_strlen(src
);
879 if(0 < srcLength
&& srcLength
<= destCapacity
){
880 uprv_memcpy(dest
,src
,srcLength
*U_SIZEOF_UCHAR
);
883 *pDestLength
= srcLength
;
886 u_terminateUChars(dest
,destCapacity
,srcLength
,pErrorCode
);
890 #elif defined U_WCHAR_IS_UTF32
892 return u_strFromUTF32(dest
, destCapacity
, pDestLength
,
893 (UChar32
*)src
, srcLength
, pErrorCode
);
897 return _strFromWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
,pErrorCode
);