2 *******************************************************************************
4 * Copyright (C) 2001-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ustr_wcs.cpp
10 * tab size: 8 (not used)
13 * created on: 2004sep07
14 * created by: Markus W. Scherer
16 * u_strToWCS() and u_strFromWCS() functions
17 * moved here from ustrtrns.c for better modularization.
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
30 #define _STACK_BUFFER_CAPACITY 1000
31 #define _BUFFER_CAPACITY_MULTIPLIER 2
33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
34 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
35 // Then we could change this to work only with wchar_t buffers.
37 u_growAnyBufferFromStatic(void *context
,
38 void **pBuffer
, int32_t *pCapacity
, int32_t reqCapacity
,
39 int32_t length
, int32_t size
) {
40 // Use char* not void* to avoid the compiler's strict-aliasing assumptions
41 // and related warnings.
42 char *newBuffer
=(char *)uprv_malloc(reqCapacity
*size
);
45 uprv_memcpy(newBuffer
, *pBuffer
, length
*size
);
47 *pCapacity
=reqCapacity
;
52 /* release the old pBuffer if it was not statically allocated */
53 if(*pBuffer
!=(char *)context
) {
58 return (UBool
)(newBuffer
!=NULL
);
63 _strToWCS(wchar_t *dest
,
68 UErrorCode
*pErrorCode
){
70 char stackBuffer
[_STACK_BUFFER_CAPACITY
];
71 char* tempBuf
= stackBuffer
;
72 int32_t tempBufCapacity
= _STACK_BUFFER_CAPACITY
;
73 char* tempBufLimit
= stackBuffer
+ tempBufCapacity
;
74 UConverter
* conv
= NULL
;
75 char* saveBuf
= tempBuf
;
76 wchar_t* intTarget
=NULL
;
77 int32_t intTargetCapacity
=0;
80 const UChar
*pSrcLimit
=NULL
;
81 const UChar
*pSrc
= src
;
83 conv
= u_getDefaultConverter(pErrorCode
);
85 if(U_FAILURE(*pErrorCode
)){
90 srcLength
= u_strlen(pSrc
);
93 pSrcLimit
= pSrc
+ srcLength
;
96 /* reset the error state */
97 *pErrorCode
= U_ZERO_ERROR
;
99 /* convert to chars using default converter */
100 ucnv_fromUnicode(conv
,&tempBuf
,tempBufLimit
,&pSrc
,pSrcLimit
,NULL
,(UBool
)(pSrc
==pSrcLimit
),pErrorCode
);
101 count
=(tempBuf
- saveBuf
);
103 /* This should rarely occur */
104 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
107 /* we dont have enough room on the stack grow the buffer */
108 int32_t newCapacity
= 2 * srcLength
;
109 if(newCapacity
<= tempBufCapacity
) {
110 newCapacity
= _BUFFER_CAPACITY_MULTIPLIER
* tempBufCapacity
;
112 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
113 newCapacity
, count
, 1)) {
118 tempBufLimit
= tempBuf
+ tempBufCapacity
;
119 tempBuf
= tempBuf
+ count
;
126 if(U_FAILURE(*pErrorCode
)){
130 /* done with conversion null terminate the char buffer */
131 if(count
>=tempBufCapacity
){
133 /* we dont have enough room on the stack grow the buffer */
134 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
135 count
+1, count
, 1)) {
144 /* allocate more space than required
145 * here we assume that every char requires
146 * no more than 2 wchar_ts
148 intTargetCapacity
= (count
* _BUFFER_CAPACITY_MULTIPLIER
+ 1) /*for null termination */;
149 intTarget
= (wchar_t*)uprv_malloc( intTargetCapacity
* sizeof(wchar_t) );
154 int32_t remaining
= intTargetCapacity
;
155 wchar_t* pIntTarget
=intTarget
;
158 /* now convert the mbs to wcs */
161 /* we can call the system API since we are sure that
162 * there is atleast 1 null in the input
164 retVal
= uprv_mbstowcs(pIntTarget
,(tempBuf
+nulLen
),remaining
);
167 *pErrorCode
= U_INVALID_CHAR_FOUND
;
169 }else if(retVal
== remaining
){/* should never occur */
170 int numWritten
= (pIntTarget
-intTarget
);
171 u_growAnyBufferFromStatic(NULL
,(void**) &intTarget
,
173 intTargetCapacity
* _BUFFER_CAPACITY_MULTIPLIER
,
176 pIntTarget
= intTarget
;
177 remaining
=intTargetCapacity
;
179 if(nulLen
!=count
){ /*there are embedded nulls*/
180 pIntTarget
+=numWritten
;
181 remaining
-=numWritten
;
187 /* we donot check for limit since tempBuf is null terminated */
188 while(tempBuf
[nulLen
++] != 0){
190 nulVal
= (nulLen
< srcLength
) ? 1 : 0;
191 pIntTarget
= pIntTarget
+ retVal
+nulVal
;
192 remaining
-=(retVal
+nulVal
);
194 /* check if we have reached the source limit*/
200 count
= (int32_t)(pIntTarget
-intTarget
);
202 if(0 < count
&& count
<= destCapacity
){
203 uprv_memcpy(dest
,intTarget
,count
*sizeof(wchar_t));
207 *pDestLength
= count
;
210 /* free the allocated memory */
211 uprv_free(intTarget
);
214 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
217 /* are we still using stack buffer */
218 if(stackBuffer
!= saveBuf
){
221 u_terminateWChars(dest
,destCapacity
,count
,pErrorCode
);
223 u_releaseDefaultConverter(conv
);
229 U_CAPI
wchar_t* U_EXPORT2
230 u_strToWCS(wchar_t *dest
,
231 int32_t destCapacity
,
232 int32_t *pDestLength
,
235 UErrorCode
*pErrorCode
){
238 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
242 if( (src
==NULL
&& srcLength
!=0) || srcLength
< -1 ||
243 (destCapacity
<0) || (dest
== NULL
&& destCapacity
> 0)
245 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
249 #ifdef U_WCHAR_IS_UTF16
250 /* wchar_t is UTF-16 just do a memcpy */
252 srcLength
= u_strlen(src
);
254 if(0 < srcLength
&& srcLength
<= destCapacity
){
255 uprv_memcpy(dest
,src
,srcLength
*U_SIZEOF_UCHAR
);
258 *pDestLength
= srcLength
;
261 u_terminateUChars(dest
,destCapacity
,srcLength
,pErrorCode
);
265 #elif defined U_WCHAR_IS_UTF32
267 return (wchar_t*)u_strToUTF32((UChar32
*)dest
, destCapacity
, pDestLength
,
268 src
, srcLength
, pErrorCode
);
272 return _strToWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
, pErrorCode
);
278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
279 /* helper function */
281 _strFromWCS( UChar
*dest
,
282 int32_t destCapacity
,
283 int32_t *pDestLength
,
286 UErrorCode
*pErrorCode
)
288 int32_t retVal
=0, count
=0 ;
289 UConverter
* conv
= NULL
;
290 UChar
* pTarget
= NULL
;
291 UChar
* pTargetLimit
= NULL
;
292 UChar
* target
= NULL
;
294 UChar uStack
[_STACK_BUFFER_CAPACITY
];
296 wchar_t wStack
[_STACK_BUFFER_CAPACITY
];
297 wchar_t* pWStack
= wStack
;
300 char cStack
[_STACK_BUFFER_CAPACITY
];
301 int32_t cStackCap
= _STACK_BUFFER_CAPACITY
;
304 char* pCSrcLimit
=NULL
;
306 const wchar_t* pSrc
= src
;
307 const wchar_t* pSrcLimit
= NULL
;
310 /* if the wchar_t source is null terminated we can safely
311 * assume that there are no embedded nulls, this is a fast
312 * path for null terminated strings.
315 /* convert wchars to chars */
316 retVal
= uprv_wcstombs(pCSrc
,src
, cStackCap
);
319 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
321 }else if(retVal
>= (cStackCap
-1)){
322 /* Should rarely occur */
323 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
324 cStackCap
* _BUFFER_CAPACITY_MULTIPLIER
, 0, sizeof(char));
327 /* converted every thing */
328 pCSrc
= pCSrc
+retVal
;
334 /* here the source is not null terminated
335 * so it may have nulls embeded and we need to
336 * do some extra processing
338 int32_t remaining
=cStackCap
;
340 pSrcLimit
= src
+ srcLength
;
343 register int32_t nulLen
= 0;
345 /* find nulls in the string */
346 while(nulLen
<srcLength
&& pSrc
[nulLen
++]!=0){
349 if((pSrc
+nulLen
) < pSrcLimit
){
350 /* check if we have enough room in pCSrc */
351 if(remaining
< (nulLen
* MB_CUR_MAX
)){
352 /* should rarely occur */
353 int32_t len
= (pCSrc
-pCSave
);
355 /* we do not have enough room so grow the buffer*/
356 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
357 _BUFFER_CAPACITY_MULTIPLIER
*cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
361 remaining
= cStackCap
-(pCSrc
- pCSave
);
364 /* we have found a null so convert the
365 * chunk from begining of non-null char to null
367 retVal
= uprv_wcstombs(pCSrc
,pSrc
,remaining
);
370 /* an error occurred bail out */
371 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
375 pCSrc
+= retVal
+1 /* already null terminated */;
377 pSrc
+= nulLen
; /* skip past the null */
378 srcLength
-=nulLen
; /* decrement the srcLength */
379 remaining
-= (pCSrc
-pCSave
);
383 /* the source is not null terminated and we are
384 * end of source so we copy the source to a temp buffer
385 * null terminate it and convert wchar_ts to chars
387 if(nulLen
>= _STACK_BUFFER_CAPACITY
){
388 /* Should rarely occcur */
389 /* allocate new buffer buffer */
390 pWStack
=(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen
+ 1));
392 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
397 /* copy the contents to tempStack */
398 uprv_memcpy(pWStack
,pSrc
,nulLen
*sizeof(wchar_t));
401 /* null terminate the tempBuffer */
404 if(remaining
< (nulLen
* MB_CUR_MAX
)){
405 /* Should rarely occur */
406 int32_t len
= (pCSrc
-pCSave
);
408 /* we do not have enough room so grow the buffer*/
409 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
410 cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
414 remaining
= cStackCap
-(pCSrc
- pCSave
);
416 /* convert to chars */
417 retVal
= uprv_wcstombs(pCSrc
,pWStack
,remaining
);
421 srcLength
-=nulLen
; /* decrement the srcLength */
427 /* OK..now we have converted from wchar_ts to chars now
428 * convert chars to UChars
432 pTarget
= target
= dest
;
433 pTargetLimit
= dest
+ destCapacity
;
435 conv
= u_getDefaultConverter(pErrorCode
);
437 if(U_FAILURE(*pErrorCode
)|| conv
==NULL
){
443 *pErrorCode
= U_ZERO_ERROR
;
445 /* convert to stack buffer*/
446 ucnv_toUnicode(conv
,&pTarget
,pTargetLimit
,(const char**)&pCSrc
,pCSrcLimit
,NULL
,(UBool
)(pCSrc
==pCSrcLimit
),pErrorCode
);
448 /* increment count to number written to stack */
449 count
+= pTarget
- target
;
451 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
454 pTargetLimit
= uStack
+ _STACK_BUFFER_CAPACITY
;
465 u_terminateUChars(dest
,destCapacity
,count
,pErrorCode
);
469 if(cStack
!= pCSave
){
473 if(wStack
!= pWStack
){
477 u_releaseDefaultConverter(conv
);
483 U_CAPI UChar
* U_EXPORT2
484 u_strFromWCS(UChar
*dest
,
485 int32_t destCapacity
,
486 int32_t *pDestLength
,
489 UErrorCode
*pErrorCode
)
493 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
497 if( (src
==NULL
&& srcLength
!=0) || srcLength
< -1 ||
498 (destCapacity
<0) || (dest
== NULL
&& destCapacity
> 0)
500 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
504 #ifdef U_WCHAR_IS_UTF16
505 /* wchar_t is UTF-16 just do a memcpy */
507 srcLength
= u_strlen(src
);
509 if(0 < srcLength
&& srcLength
<= destCapacity
){
510 uprv_memcpy(dest
,src
,srcLength
*U_SIZEOF_UCHAR
);
513 *pDestLength
= srcLength
;
516 u_terminateUChars(dest
,destCapacity
,srcLength
,pErrorCode
);
520 #elif defined U_WCHAR_IS_UTF32
522 return u_strFromUTF32(dest
, destCapacity
, pDestLength
,
523 (UChar32
*)src
, srcLength
, pErrorCode
);
527 return _strFromWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
,pErrorCode
);
533 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */