1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2001-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: ustr_wcs.cpp
12 * tab size: 8 (not used)
15 * created on: 2004sep07
16 * created by: Markus W. Scherer
18 * u_strToWCS() and u_strFromWCS() functions
19 * moved here from ustrtrns.c for better modularization.
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
30 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
32 #define _STACK_BUFFER_CAPACITY 1000
33 #define _BUFFER_CAPACITY_MULTIPLIER 2
35 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
36 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
37 // Then we could change this to work only with wchar_t buffers.
39 u_growAnyBufferFromStatic(void *context
,
40 void **pBuffer
, int32_t *pCapacity
, int32_t reqCapacity
,
41 int32_t length
, int32_t size
) {
42 // Use char* not void* to avoid the compiler's strict-aliasing assumptions
43 // and related warnings.
44 char *newBuffer
=(char *)uprv_malloc(reqCapacity
*size
);
47 uprv_memcpy(newBuffer
, *pBuffer
, (size_t)length
*size
);
49 *pCapacity
=reqCapacity
;
54 /* release the old pBuffer if it was not statically allocated */
55 if(*pBuffer
!=(char *)context
) {
60 return (UBool
)(newBuffer
!=NULL
);
65 _strToWCS(wchar_t *dest
,
70 UErrorCode
*pErrorCode
){
72 char stackBuffer
[_STACK_BUFFER_CAPACITY
];
73 char* tempBuf
= stackBuffer
;
74 int32_t tempBufCapacity
= _STACK_BUFFER_CAPACITY
;
75 char* tempBufLimit
= stackBuffer
+ tempBufCapacity
;
76 UConverter
* conv
= NULL
;
77 char* saveBuf
= tempBuf
;
78 wchar_t* intTarget
=NULL
;
79 int32_t intTargetCapacity
=0;
82 const UChar
*pSrcLimit
=NULL
;
83 const UChar
*pSrc
= src
;
85 conv
= u_getDefaultConverter(pErrorCode
);
87 if(U_FAILURE(*pErrorCode
)){
92 srcLength
= u_strlen(pSrc
);
95 pSrcLimit
= pSrc
+ srcLength
;
98 /* reset the error state */
99 *pErrorCode
= U_ZERO_ERROR
;
101 /* convert to chars using default converter */
102 ucnv_fromUnicode(conv
,&tempBuf
,tempBufLimit
,&pSrc
,pSrcLimit
,NULL
,(UBool
)(pSrc
==pSrcLimit
),pErrorCode
);
103 count
=(tempBuf
- saveBuf
);
105 /* This should rarely occur */
106 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
109 /* we dont have enough room on the stack grow the buffer */
110 int32_t newCapacity
= 2 * srcLength
;
111 if(newCapacity
<= tempBufCapacity
) {
112 newCapacity
= _BUFFER_CAPACITY_MULTIPLIER
* tempBufCapacity
;
114 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
115 newCapacity
, count
, 1)) {
120 tempBufLimit
= tempBuf
+ tempBufCapacity
;
121 tempBuf
= tempBuf
+ count
;
128 if(U_FAILURE(*pErrorCode
)){
132 /* done with conversion null terminate the char buffer */
133 if(count
>=tempBufCapacity
){
135 /* we dont have enough room on the stack grow the buffer */
136 if(!u_growAnyBufferFromStatic(stackBuffer
,(void**) &tempBuf
, &tempBufCapacity
,
137 count
+1, count
, 1)) {
146 /* allocate more space than required
147 * here we assume that every char requires
148 * no more than 2 wchar_ts
150 intTargetCapacity
= (count
* _BUFFER_CAPACITY_MULTIPLIER
+ 1) /*for null termination */;
151 intTarget
= (wchar_t*)uprv_malloc( intTargetCapacity
* sizeof(wchar_t) );
156 int32_t remaining
= intTargetCapacity
;
157 wchar_t* pIntTarget
=intTarget
;
160 /* now convert the mbs to wcs */
163 /* we can call the system API since we are sure that
164 * there is atleast 1 null in the input
166 retVal
= uprv_mbstowcs(pIntTarget
,(tempBuf
+nulLen
),remaining
);
169 *pErrorCode
= U_INVALID_CHAR_FOUND
;
171 }else if(retVal
== remaining
){/* should never occur */
172 int numWritten
= (pIntTarget
-intTarget
);
173 u_growAnyBufferFromStatic(NULL
,(void**) &intTarget
,
175 intTargetCapacity
* _BUFFER_CAPACITY_MULTIPLIER
,
178 pIntTarget
= intTarget
;
179 remaining
=intTargetCapacity
;
181 if(nulLen
!=count
){ /*there are embedded nulls*/
182 pIntTarget
+=numWritten
;
183 remaining
-=numWritten
;
189 /* we donot check for limit since tempBuf is null terminated */
190 while(tempBuf
[nulLen
++] != 0){
192 nulVal
= (nulLen
< srcLength
) ? 1 : 0;
193 pIntTarget
= pIntTarget
+ retVal
+nulVal
;
194 remaining
-=(retVal
+nulVal
);
196 /* check if we have reached the source limit*/
202 count
= (int32_t)(pIntTarget
-intTarget
);
204 if(0 < count
&& count
<= destCapacity
){
205 uprv_memcpy(dest
, intTarget
, (size_t)count
*sizeof(wchar_t));
209 *pDestLength
= count
;
212 /* free the allocated memory */
213 uprv_free(intTarget
);
216 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
219 /* are we still using stack buffer */
220 if(stackBuffer
!= saveBuf
){
223 u_terminateWChars(dest
,destCapacity
,count
,pErrorCode
);
225 u_releaseDefaultConverter(conv
);
231 U_CAPI
wchar_t* U_EXPORT2
232 u_strToWCS(wchar_t *dest
,
233 int32_t destCapacity
,
234 int32_t *pDestLength
,
237 UErrorCode
*pErrorCode
){
240 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
244 if( (src
==NULL
&& srcLength
!=0) || srcLength
< -1 ||
245 (destCapacity
<0) || (dest
== NULL
&& destCapacity
> 0)
247 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
251 #ifdef U_WCHAR_IS_UTF16
252 /* wchar_t is UTF-16 just do a memcpy */
254 srcLength
= u_strlen(src
);
256 if(0 < srcLength
&& srcLength
<= destCapacity
){
257 u_memcpy((UChar
*)dest
, src
, srcLength
);
260 *pDestLength
= srcLength
;
263 u_terminateUChars((UChar
*)dest
,destCapacity
,srcLength
,pErrorCode
);
267 #elif defined U_WCHAR_IS_UTF32
269 return (wchar_t*)u_strToUTF32((UChar32
*)dest
, destCapacity
, pDestLength
,
270 src
, srcLength
, pErrorCode
);
274 return _strToWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
, pErrorCode
);
280 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
281 /* helper function */
283 _strFromWCS( UChar
*dest
,
284 int32_t destCapacity
,
285 int32_t *pDestLength
,
288 UErrorCode
*pErrorCode
)
290 int32_t retVal
=0, count
=0 ;
291 UConverter
* conv
= NULL
;
292 UChar
* pTarget
= NULL
;
293 UChar
* pTargetLimit
= NULL
;
294 UChar
* target
= NULL
;
296 UChar uStack
[_STACK_BUFFER_CAPACITY
];
298 wchar_t wStack
[_STACK_BUFFER_CAPACITY
];
299 wchar_t* pWStack
= wStack
;
302 char cStack
[_STACK_BUFFER_CAPACITY
];
303 int32_t cStackCap
= _STACK_BUFFER_CAPACITY
;
306 char* pCSrcLimit
=NULL
;
308 const wchar_t* pSrc
= src
;
309 const wchar_t* pSrcLimit
= NULL
;
312 /* if the wchar_t source is null terminated we can safely
313 * assume that there are no embedded nulls, this is a fast
314 * path for null terminated strings.
317 /* convert wchars to chars */
318 retVal
= uprv_wcstombs(pCSrc
,src
, cStackCap
);
321 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
323 }else if(retVal
>= (cStackCap
-1)){
324 /* Should rarely occur */
325 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
326 cStackCap
* _BUFFER_CAPACITY_MULTIPLIER
, 0, sizeof(char));
329 /* converted every thing */
330 pCSrc
= pCSrc
+retVal
;
336 /* here the source is not null terminated
337 * so it may have nulls embeded and we need to
338 * do some extra processing
340 int32_t remaining
=cStackCap
;
342 pSrcLimit
= src
+ srcLength
;
347 /* find nulls in the string */
348 while(nulLen
<srcLength
&& pSrc
[nulLen
++]!=0){
351 if((pSrc
+nulLen
) < pSrcLimit
){
352 /* check if we have enough room in pCSrc */
353 if(remaining
< (nulLen
* MB_CUR_MAX
)){
354 /* should rarely occur */
355 int32_t len
= (pCSrc
-pCSave
);
357 /* we do not have enough room so grow the buffer*/
358 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
359 _BUFFER_CAPACITY_MULTIPLIER
*cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
363 remaining
= cStackCap
-(pCSrc
- pCSave
);
366 /* we have found a null so convert the
367 * chunk from begining of non-null char to null
369 retVal
= uprv_wcstombs(pCSrc
,pSrc
,remaining
);
372 /* an error occurred bail out */
373 *pErrorCode
= U_ILLEGAL_CHAR_FOUND
;
377 pCSrc
+= retVal
+1 /* already null terminated */;
379 pSrc
+= nulLen
; /* skip past the null */
380 srcLength
-=nulLen
; /* decrement the srcLength */
381 remaining
-= (pCSrc
-pCSave
);
385 /* the source is not null terminated and we are
386 * end of source so we copy the source to a temp buffer
387 * null terminate it and convert wchar_ts to chars
389 if(nulLen
>= _STACK_BUFFER_CAPACITY
){
390 /* Should rarely occcur */
391 /* allocate new buffer buffer */
392 pWStack
=(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen
+ 1));
394 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
399 /* copy the contents to tempStack */
400 uprv_memcpy(pWStack
, pSrc
, (size_t)nulLen
*sizeof(wchar_t));
403 /* null terminate the tempBuffer */
406 if(remaining
< (nulLen
* MB_CUR_MAX
)){
407 /* Should rarely occur */
408 int32_t len
= (pCSrc
-pCSave
);
410 /* we do not have enough room so grow the buffer*/
411 u_growAnyBufferFromStatic(cStack
,(void**)&pCSrc
,&cStackCap
,
412 cStackCap
+(nulLen
*MB_CUR_MAX
),len
,sizeof(char));
416 remaining
= cStackCap
-(pCSrc
- pCSave
);
418 /* convert to chars */
419 retVal
= uprv_wcstombs(pCSrc
,pWStack
,remaining
);
423 srcLength
-=nulLen
; /* decrement the srcLength */
429 /* OK..now we have converted from wchar_ts to chars now
430 * convert chars to UChars
434 pTarget
= target
= dest
;
435 pTargetLimit
= dest
+ destCapacity
;
437 conv
= u_getDefaultConverter(pErrorCode
);
439 if(U_FAILURE(*pErrorCode
)|| conv
==NULL
){
445 *pErrorCode
= U_ZERO_ERROR
;
447 /* convert to stack buffer*/
448 ucnv_toUnicode(conv
,&pTarget
,pTargetLimit
,(const char**)&pCSrc
,pCSrcLimit
,NULL
,(UBool
)(pCSrc
==pCSrcLimit
),pErrorCode
);
450 /* increment count to number written to stack */
451 count
+= pTarget
- target
;
453 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
){
456 pTargetLimit
= uStack
+ _STACK_BUFFER_CAPACITY
;
467 u_terminateUChars(dest
,destCapacity
,count
,pErrorCode
);
471 if(cStack
!= pCSave
){
475 if(wStack
!= pWStack
){
479 u_releaseDefaultConverter(conv
);
485 U_CAPI UChar
* U_EXPORT2
486 u_strFromWCS(UChar
*dest
,
487 int32_t destCapacity
,
488 int32_t *pDestLength
,
491 UErrorCode
*pErrorCode
)
495 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
499 if( (src
==NULL
&& srcLength
!=0) || srcLength
< -1 ||
500 (destCapacity
<0) || (dest
== NULL
&& destCapacity
> 0)
502 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
506 #ifdef U_WCHAR_IS_UTF16
507 /* wchar_t is UTF-16 just do a memcpy */
509 srcLength
= u_strlen((const UChar
*)src
);
511 if(0 < srcLength
&& srcLength
<= destCapacity
){
512 u_memcpy(dest
, (const UChar
*)src
, srcLength
);
515 *pDestLength
= srcLength
;
518 u_terminateUChars(dest
,destCapacity
,srcLength
,pErrorCode
);
522 #elif defined U_WCHAR_IS_UTF32
524 return u_strFromUTF32(dest
, destCapacity
, pDestLength
,
525 (UChar32
*)src
, srcLength
, pErrorCode
);
529 return _strFromWCS(dest
,destCapacity
,pDestLength
,src
,srcLength
,pErrorCode
);
535 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */