2 *******************************************************************************
4 * Copyright (C) 2001-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ustr_wcs.c
10 * tab size: 8 (not used)
13 * created on: 2004sep07
14 * created by: Markus W. Scherer
16 * u_strToWCS() and u_strFromWCS() functions
17 * moved here from ustrtrns.c for better modularization.
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
30 #define _STACK_BUFFER_CAPACITY 1000
31 #define _BUFFER_CAPACITY_MULTIPLIER 2
33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
35 u_growAnyBufferFromStatic(void *context
,
36 void **pBuffer
, int32_t *pCapacity
, int32_t reqCapacity
,
37 int32_t length
, int32_t size
) {
39 void *newBuffer
=uprv_malloc(reqCapacity
*size
);
42 uprv_memcpy(newBuffer
, *pBuffer
, length
*size
);
44 *pCapacity
=reqCapacity
;
49 /* release the old pBuffer if it was not statically allocated */
50 if(*pBuffer
!=(void *)context
) {
55 return (UBool
)(newBuffer
!=NULL
);
60 _strToWCS(wchar_t *dest
,
65 UErrorCode
*pErrorCode
){
67 char stackBuffer
[_STACK_BUFFER_CAPACITY
];
68 char* tempBuf
= stackBuffer
;
69 int32_t tempBufCapacity
= _STACK_BUFFER_CAPACITY
;
70 char* tempBufLimit
= stackBuffer
+ tempBufCapacity
;
71 UConverter
* conv
= NULL
;
72 char* saveBuf
= tempBuf
;
73 wchar_t* intTarget
=NULL
;
74 int32_t intTargetCapacity
=0;
77 const UChar
*pSrcLimit
=NULL
;
78 const UChar
*pSrc
= src
;
80 conv
= u_getDefaultConverter(pErrorCode
);
82 if(U_FAILURE(*pErrorCode
)){
87 srcLength
= u_strlen(pSrc
);
90 pSrcLimit
= pSrc
+ srcLength
;
93 /* reset the error state */
94 *pErrorCode
= U_ZERO_ERROR
;
96 /* convert to chars using default converter */
97 ucnv_fromUnicode(conv
,&tempBuf
,tempBufLimit
,&pSrc
,pSrcLimit
,NULL
,(UBool
)(pSrc
==pSrcLimit
),pErrorCode
);
98 count
=(tempBuf
- saveBuf
);
100 /* This should rarely occur */
101 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
104 /* we dont have enough room on the stack grow the buffer */
105 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
106 (_BUFFER_CAPACITY_MULTIPLIER
* (srcLength
)), count
,sizeof(char))){
111 tempBufLimit
= tempBuf
+ tempBufCapacity
;
112 tempBuf
= tempBuf
+ count
;
119 if(U_FAILURE(*pErrorCode
)){
123 /* done with conversion null terminate the char buffer */
124 if(count
>=tempBufCapacity
){
126 /* we dont have enough room on the stack grow the buffer */
127 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
128 tempBufCapacity
-count
+1, count
,sizeof(char))){
137 /* allocate more space than required
138 * here we assume that every char requires
139 * no more than 2 wchar_ts
141 intTargetCapacity
= (count
* _BUFFER_CAPACITY_MULTIPLIER
+ 1) /*for null termination */;
142 intTarget
= (wchar_t*)uprv_malloc( intTargetCapacity
* sizeof(wchar_t) );
147 int32_t remaining
= intTargetCapacity
;
148 wchar_t* pIntTarget
=intTarget
;
151 /* now convert the mbs to wcs */
154 /* we can call the system API since we are sure that
155 * there is atleast 1 null in the input
157 retVal
= uprv_mbstowcs(pIntTarget
,(tempBuf
+nulLen
),remaining
);
160 *pErrorCode
= U_INVALID_CHAR_FOUND
;
162 }else if(retVal
== remaining
){/* should never occur */
163 int numWritten
= (pIntTarget
-intTarget
);
164 u_growAnyBufferFromStatic(NULL
,(void**) &intTarget
,
166 intTargetCapacity
* _BUFFER_CAPACITY_MULTIPLIER
,
169 pIntTarget
= intTarget
;
170 remaining
=intTargetCapacity
;
172 if(nulLen
!=count
){ /*there are embedded nulls*/
173 pIntTarget
+=numWritten
;
174 remaining
-=numWritten
;
180 /* we donot check for limit since tempBuf is null terminated */
181 while(tempBuf
[nulLen
++] != 0){
183 nulVal
= (nulLen
< srcLength
) ? 1 : 0;
184 pIntTarget
= pIntTarget
+ retVal
+nulVal
;
185 remaining
-=(retVal
+nulVal
);
187 /* check if we have reached the source limit*/
193 count
= (int32_t)(pIntTarget
-intTarget
);
195 if(0 < count
&& count
<= destCapacity
){
196 uprv_memcpy(dest
,intTarget
,count
*sizeof(wchar_t));
200 *pDestLength
= count
;
203 /* free the allocated memory */
204 uprv_free(intTarget
);
207 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
210 /* are we still using stack buffer */
211 if(stackBuffer
!= saveBuf
){
214 u_terminateWChars(dest
,destCapacity
,count
,pErrorCode
);
216 u_releaseDefaultConverter(conv
);
222 U_CAPI
wchar_t* U_EXPORT2
223 u_strToWCS(wchar_t *dest
,
224 int32_t destCapacity
,
225 int32_t *pDestLength
,
228 UErrorCode
*pErrorCode
){
231 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
235 if( (src
==NULL
&& srcLength
!=0) || srcLength
< -1 ||
236 (destCapacity
<0) || (dest
== NULL
&& destCapacity
> 0)
238 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
242 #ifdef U_WCHAR_IS_UTF16
243 /* wchar_t is UTF-16 just do a memcpy */
245 srcLength
= u_strlen(src
);
247 if(0 < srcLength
&& srcLength
<= destCapacity
){
248 uprv_memcpy(dest
,src
,srcLength
*U_SIZEOF_UCHAR
);
251 *pDestLength
= srcLength
;
254 u_terminateUChars(dest
,destCapacity
,srcLength
,pErrorCode
);
258 #elif defined U_WCHAR_IS_UTF32
260 return (wchar_t*)u_strToUTF32((UChar32
*)dest
, destCapacity
, pDestLength
,
261 src
, srcLength
, pErrorCode
);
265 return _strToWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
, pErrorCode
);
271 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
272 /* helper function */
274 _strFromWCS( UChar
*dest
,
275 int32_t destCapacity
,
276 int32_t *pDestLength
,
279 UErrorCode
*pErrorCode
)
281 int32_t retVal
=0, count
=0 ;
282 UConverter
* conv
= NULL
;
283 UChar
* pTarget
= NULL
;
284 UChar
* pTargetLimit
= NULL
;
285 UChar
* target
= NULL
;
287 UChar uStack
[_STACK_BUFFER_CAPACITY
];
289 wchar_t wStack
[_STACK_BUFFER_CAPACITY
];
290 wchar_t* pWStack
= wStack
;
293 char cStack
[_STACK_BUFFER_CAPACITY
];
294 int32_t cStackCap
= _STACK_BUFFER_CAPACITY
;
297 char* pCSrcLimit
=NULL
;
299 const wchar_t* pSrc
= src
;
300 const wchar_t* pSrcLimit
= NULL
;
303 /* if the wchar_t source is null terminated we can safely
304 * assume that there are no embedded nulls, this is a fast
305 * path for null terminated strings.
308 /* convert wchars to chars */
309 retVal
= uprv_wcstombs(pCSrc
,src
, cStackCap
);
312 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
314 }else if(retVal
>= (cStackCap
-1)){
315 /* Should rarely occur */
316 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
317 cStackCap
* _BUFFER_CAPACITY_MULTIPLIER
, 0, sizeof(char));
320 /* converted every thing */
321 pCSrc
= pCSrc
+retVal
;
327 /* here the source is not null terminated
328 * so it may have nulls embeded and we need to
329 * do some extra processing
331 int32_t remaining
=cStackCap
;
333 pSrcLimit
= src
+ srcLength
;
336 register int32_t nulLen
= 0;
338 /* find nulls in the string */
339 while(nulLen
<srcLength
&& pSrc
[nulLen
++]!=0){
342 if((pSrc
+nulLen
) < pSrcLimit
){
343 /* check if we have enough room in pCSrc */
344 if(remaining
< (nulLen
* MB_CUR_MAX
)){
345 /* should rarely occur */
346 int32_t len
= (pCSrc
-pCSave
);
348 /* we do not have enough room so grow the buffer*/
349 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
350 _BUFFER_CAPACITY_MULTIPLIER
*cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
354 remaining
= cStackCap
-(pCSrc
- pCSave
);
357 /* we have found a null so convert the
358 * chunk from begining of non-null char to null
360 retVal
= uprv_wcstombs(pCSrc
,pSrc
,remaining
);
363 /* an error occurred bail out */
364 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
368 pCSrc
+= retVal
+1 /* already null terminated */;
370 pSrc
+= nulLen
; /* skip past the null */
371 srcLength
-=nulLen
; /* decrement the srcLength */
372 remaining
-= (pCSrc
-pCSave
);
376 /* the source is not null terminated and we are
377 * end of source so we copy the source to a temp buffer
378 * null terminate it and convert wchar_ts to chars
380 if(nulLen
>= _STACK_BUFFER_CAPACITY
){
381 /* Should rarely occcur */
382 /* allocate new buffer buffer */
383 pWStack
=(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen
+ 1));
385 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
390 /* copy the contents to tempStack */
391 uprv_memcpy(pWStack
,pSrc
,nulLen
*sizeof(wchar_t));
394 /* null terminate the tempBuffer */
397 if(remaining
< (nulLen
* MB_CUR_MAX
)){
398 /* Should rarely occur */
399 int32_t len
= (pCSrc
-pCSave
);
401 /* we do not have enough room so grow the buffer*/
402 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
403 cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
407 remaining
= cStackCap
-(pCSrc
- pCSave
);
409 /* convert to chars */
410 retVal
= uprv_wcstombs(pCSrc
,pWStack
,remaining
);
414 srcLength
-=nulLen
; /* decrement the srcLength */
420 /* OK..now we have converted from wchar_ts to chars now
421 * convert chars to UChars
425 pTarget
= target
= dest
;
426 pTargetLimit
= dest
+ destCapacity
;
428 conv
= u_getDefaultConverter(pErrorCode
);
430 if(U_FAILURE(*pErrorCode
)|| conv
==NULL
){
436 *pErrorCode
= U_ZERO_ERROR
;
438 /* convert to stack buffer*/
439 ucnv_toUnicode(conv
,&pTarget
,pTargetLimit
,(const char**)&pCSrc
,pCSrcLimit
,NULL
,(UBool
)(pCSrc
==pCSrcLimit
),pErrorCode
);
441 /* increment count to number written to stack */
442 count
+= pTarget
- target
;
444 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
447 pTargetLimit
= uStack
+ _STACK_BUFFER_CAPACITY
;
458 u_terminateUChars(dest
,destCapacity
,count
,pErrorCode
);
462 if(cStack
!= pCSave
){
466 if(wStack
!= pWStack
){
470 u_releaseDefaultConverter(conv
);
476 U_CAPI UChar
* U_EXPORT2
477 u_strFromWCS(UChar
*dest
,
478 int32_t destCapacity
,
479 int32_t *pDestLength
,
482 UErrorCode
*pErrorCode
)
486 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
490 if( (src
==NULL
&& srcLength
!=0) || srcLength
< -1 ||
491 (destCapacity
<0) || (dest
== NULL
&& destCapacity
> 0)
493 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
497 #ifdef U_WCHAR_IS_UTF16
498 /* wchar_t is UTF-16 just do a memcpy */
500 srcLength
= u_strlen(src
);
502 if(0 < srcLength
&& srcLength
<= destCapacity
){
503 uprv_memcpy(dest
,src
,srcLength
*U_SIZEOF_UCHAR
);
506 *pDestLength
= srcLength
;
509 u_terminateUChars(dest
,destCapacity
,srcLength
,pErrorCode
);
513 #elif defined U_WCHAR_IS_UTF32
515 return u_strFromUTF32(dest
, destCapacity
, pDestLength
,
516 (UChar32
*)src
, srcLength
, pErrorCode
);
520 return _strFromWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
,pErrorCode
);
526 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */