]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ustr_wcs.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / common / ustr_wcs.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2001-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: ustr_wcs.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2004sep07
14 * created by: Markus W. Scherer
15 *
16 * u_strToWCS() and u_strFromWCS() functions
17 * moved here from ustrtrns.c for better modularization.
18 */
19
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
22 #include "cstring.h"
23 #include "cwchar.h"
24 #include "cmemory.h"
25 #include "ustr_imp.h"
26 #include "ustr_cnv.h"
27
28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
29
30 #define _STACK_BUFFER_CAPACITY 1000
31 #define _BUFFER_CAPACITY_MULTIPLIER 2
32
33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
34 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
35 // Then we could change this to work only with wchar_t buffers.
36 static inline UBool
37 u_growAnyBufferFromStatic(void *context,
38 void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
39 int32_t length, int32_t size) {
40 // Use char* not void* to avoid the compiler's strict-aliasing assumptions
41 // and related warnings.
42 char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
43 if(newBuffer!=NULL) {
44 if(length>0) {
45 uprv_memcpy(newBuffer, *pBuffer, length*size);
46 }
47 *pCapacity=reqCapacity;
48 } else {
49 *pCapacity=0;
50 }
51
52 /* release the old pBuffer if it was not statically allocated */
53 if(*pBuffer!=(char *)context) {
54 uprv_free(*pBuffer);
55 }
56
57 *pBuffer=newBuffer;
58 return (UBool)(newBuffer!=NULL);
59 }
60
61 /* helper function */
62 static wchar_t*
63 _strToWCS(wchar_t *dest,
64 int32_t destCapacity,
65 int32_t *pDestLength,
66 const UChar *src,
67 int32_t srcLength,
68 UErrorCode *pErrorCode){
69
70 char stackBuffer [_STACK_BUFFER_CAPACITY];
71 char* tempBuf = stackBuffer;
72 int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
73 char* tempBufLimit = stackBuffer + tempBufCapacity;
74 UConverter* conv = NULL;
75 char* saveBuf = tempBuf;
76 wchar_t* intTarget=NULL;
77 int32_t intTargetCapacity=0;
78 int count=0,retVal=0;
79
80 const UChar *pSrcLimit =NULL;
81 const UChar *pSrc = src;
82
83 conv = u_getDefaultConverter(pErrorCode);
84
85 if(U_FAILURE(*pErrorCode)){
86 return NULL;
87 }
88
89 if(srcLength == -1){
90 srcLength = u_strlen(pSrc);
91 }
92
93 pSrcLimit = pSrc + srcLength;
94
95 for(;;) {
96 /* reset the error state */
97 *pErrorCode = U_ZERO_ERROR;
98
99 /* convert to chars using default converter */
100 ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
101 count =(tempBuf - saveBuf);
102
103 /* This should rarely occur */
104 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
105 tempBuf = saveBuf;
106
107 /* we dont have enough room on the stack grow the buffer */
108 int32_t newCapacity = 2 * srcLength;
109 if(newCapacity <= tempBufCapacity) {
110 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
111 }
112 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
113 newCapacity, count, 1)) {
114 goto cleanup;
115 }
116
117 saveBuf = tempBuf;
118 tempBufLimit = tempBuf + tempBufCapacity;
119 tempBuf = tempBuf + count;
120
121 } else {
122 break;
123 }
124 }
125
126 if(U_FAILURE(*pErrorCode)){
127 goto cleanup;
128 }
129
130 /* done with conversion null terminate the char buffer */
131 if(count>=tempBufCapacity){
132 tempBuf = saveBuf;
133 /* we dont have enough room on the stack grow the buffer */
134 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
135 count+1, count, 1)) {
136 goto cleanup;
137 }
138 saveBuf = tempBuf;
139 }
140
141 saveBuf[count]=0;
142
143
144 /* allocate more space than required
145 * here we assume that every char requires
146 * no more than 2 wchar_ts
147 */
148 intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
149 intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
150
151 if(intTarget){
152
153 int32_t nulLen = 0;
154 int32_t remaining = intTargetCapacity;
155 wchar_t* pIntTarget=intTarget;
156 tempBuf = saveBuf;
157
158 /* now convert the mbs to wcs */
159 for(;;){
160
161 /* we can call the system API since we are sure that
162 * there is atleast 1 null in the input
163 */
164 retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
165
166 if(retVal==-1){
167 *pErrorCode = U_INVALID_CHAR_FOUND;
168 break;
169 }else if(retVal== remaining){/* should never occur */
170 int numWritten = (pIntTarget-intTarget);
171 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
172 &intTargetCapacity,
173 intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
174 numWritten,
175 sizeof(wchar_t));
176 pIntTarget = intTarget;
177 remaining=intTargetCapacity;
178
179 if(nulLen!=count){ /*there are embedded nulls*/
180 pIntTarget+=numWritten;
181 remaining-=numWritten;
182 }
183
184 }else{
185 int32_t nulVal;
186 /*scan for nulls */
187 /* we donot check for limit since tempBuf is null terminated */
188 while(tempBuf[nulLen++] != 0){
189 }
190 nulVal = (nulLen < srcLength) ? 1 : 0;
191 pIntTarget = pIntTarget + retVal+nulVal;
192 remaining -=(retVal+nulVal);
193
194 /* check if we have reached the source limit*/
195 if(nulLen>=(count)){
196 break;
197 }
198 }
199 }
200 count = (int32_t)(pIntTarget-intTarget);
201
202 if(0 < count && count <= destCapacity){
203 uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
204 }
205
206 if(pDestLength){
207 *pDestLength = count;
208 }
209
210 /* free the allocated memory */
211 uprv_free(intTarget);
212
213 }else{
214 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
215 }
216 cleanup:
217 /* are we still using stack buffer */
218 if(stackBuffer != saveBuf){
219 uprv_free(saveBuf);
220 }
221 u_terminateWChars(dest,destCapacity,count,pErrorCode);
222
223 u_releaseDefaultConverter(conv);
224
225 return dest;
226 }
227 #endif
228
229 U_CAPI wchar_t* U_EXPORT2
230 u_strToWCS(wchar_t *dest,
231 int32_t destCapacity,
232 int32_t *pDestLength,
233 const UChar *src,
234 int32_t srcLength,
235 UErrorCode *pErrorCode){
236
237 /* args check */
238 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
239 return NULL;
240 }
241
242 if( (src==NULL && srcLength!=0) || srcLength < -1 ||
243 (destCapacity<0) || (dest == NULL && destCapacity > 0)
244 ) {
245 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
246 return NULL;
247 }
248
249 #ifdef U_WCHAR_IS_UTF16
250 /* wchar_t is UTF-16 just do a memcpy */
251 if(srcLength == -1){
252 srcLength = u_strlen(src);
253 }
254 if(0 < srcLength && srcLength <= destCapacity){
255 uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
256 }
257 if(pDestLength){
258 *pDestLength = srcLength;
259 }
260
261 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
262
263 return dest;
264
265 #elif defined U_WCHAR_IS_UTF32
266
267 return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
268 src, srcLength, pErrorCode);
269
270 #else
271
272 return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
273
274 #endif
275
276 }
277
278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
279 /* helper function */
280 static UChar*
281 _strFromWCS( UChar *dest,
282 int32_t destCapacity,
283 int32_t *pDestLength,
284 const wchar_t *src,
285 int32_t srcLength,
286 UErrorCode *pErrorCode)
287 {
288 int32_t retVal =0, count =0 ;
289 UConverter* conv = NULL;
290 UChar* pTarget = NULL;
291 UChar* pTargetLimit = NULL;
292 UChar* target = NULL;
293
294 UChar uStack [_STACK_BUFFER_CAPACITY];
295
296 wchar_t wStack[_STACK_BUFFER_CAPACITY];
297 wchar_t* pWStack = wStack;
298
299
300 char cStack[_STACK_BUFFER_CAPACITY];
301 int32_t cStackCap = _STACK_BUFFER_CAPACITY;
302 char* pCSrc=cStack;
303 char* pCSave=pCSrc;
304 char* pCSrcLimit=NULL;
305
306 const wchar_t* pSrc = src;
307 const wchar_t* pSrcLimit = NULL;
308
309 if(srcLength ==-1){
310 /* if the wchar_t source is null terminated we can safely
311 * assume that there are no embedded nulls, this is a fast
312 * path for null terminated strings.
313 */
314 for(;;){
315 /* convert wchars to chars */
316 retVal = uprv_wcstombs(pCSrc,src, cStackCap);
317
318 if(retVal == -1){
319 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
320 goto cleanup;
321 }else if(retVal >= (cStackCap-1)){
322 /* Should rarely occur */
323 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
324 cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
325 pCSave = pCSrc;
326 }else{
327 /* converted every thing */
328 pCSrc = pCSrc+retVal;
329 break;
330 }
331 }
332
333 }else{
334 /* here the source is not null terminated
335 * so it may have nulls embeded and we need to
336 * do some extra processing
337 */
338 int32_t remaining =cStackCap;
339
340 pSrcLimit = src + srcLength;
341
342 for(;;){
343 register int32_t nulLen = 0;
344
345 /* find nulls in the string */
346 while(nulLen<srcLength && pSrc[nulLen++]!=0){
347 }
348
349 if((pSrc+nulLen) < pSrcLimit){
350 /* check if we have enough room in pCSrc */
351 if(remaining < (nulLen * MB_CUR_MAX)){
352 /* should rarely occur */
353 int32_t len = (pCSrc-pCSave);
354 pCSrc = pCSave;
355 /* we do not have enough room so grow the buffer*/
356 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
357 _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
358
359 pCSave = pCSrc;
360 pCSrc = pCSave+len;
361 remaining = cStackCap-(pCSrc - pCSave);
362 }
363
364 /* we have found a null so convert the
365 * chunk from begining of non-null char to null
366 */
367 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
368
369 if(retVal==-1){
370 /* an error occurred bail out */
371 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
372 goto cleanup;
373 }
374
375 pCSrc += retVal+1 /* already null terminated */;
376
377 pSrc += nulLen; /* skip past the null */
378 srcLength-=nulLen; /* decrement the srcLength */
379 remaining -= (pCSrc-pCSave);
380
381
382 }else{
383 /* the source is not null terminated and we are
384 * end of source so we copy the source to a temp buffer
385 * null terminate it and convert wchar_ts to chars
386 */
387 if(nulLen >= _STACK_BUFFER_CAPACITY){
388 /* Should rarely occcur */
389 /* allocate new buffer buffer */
390 pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
391 if(pWStack==NULL){
392 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
393 goto cleanup;
394 }
395 }
396 if(nulLen>0){
397 /* copy the contents to tempStack */
398 uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
399 }
400
401 /* null terminate the tempBuffer */
402 pWStack[nulLen] =0 ;
403
404 if(remaining < (nulLen * MB_CUR_MAX)){
405 /* Should rarely occur */
406 int32_t len = (pCSrc-pCSave);
407 pCSrc = pCSave;
408 /* we do not have enough room so grow the buffer*/
409 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
410 cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
411
412 pCSave = pCSrc;
413 pCSrc = pCSave+len;
414 remaining = cStackCap-(pCSrc - pCSave);
415 }
416 /* convert to chars */
417 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
418
419 pCSrc += retVal;
420 pSrc += nulLen;
421 srcLength-=nulLen; /* decrement the srcLength */
422 break;
423 }
424 }
425 }
426
427 /* OK..now we have converted from wchar_ts to chars now
428 * convert chars to UChars
429 */
430 pCSrcLimit = pCSrc;
431 pCSrc = pCSave;
432 pTarget = target= dest;
433 pTargetLimit = dest + destCapacity;
434
435 conv= u_getDefaultConverter(pErrorCode);
436
437 if(U_FAILURE(*pErrorCode)|| conv==NULL){
438 goto cleanup;
439 }
440
441 for(;;) {
442
443 *pErrorCode = U_ZERO_ERROR;
444
445 /* convert to stack buffer*/
446 ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
447
448 /* increment count to number written to stack */
449 count+= pTarget - target;
450
451 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
452 target = uStack;
453 pTarget = uStack;
454 pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
455 } else {
456 break;
457 }
458
459 }
460
461 if(pDestLength){
462 *pDestLength =count;
463 }
464
465 u_terminateUChars(dest,destCapacity,count,pErrorCode);
466
467 cleanup:
468
469 if(cStack != pCSave){
470 uprv_free(pCSave);
471 }
472
473 if(wStack != pWStack){
474 uprv_free(pWStack);
475 }
476
477 u_releaseDefaultConverter(conv);
478
479 return dest;
480 }
481 #endif
482
483 U_CAPI UChar* U_EXPORT2
484 u_strFromWCS(UChar *dest,
485 int32_t destCapacity,
486 int32_t *pDestLength,
487 const wchar_t *src,
488 int32_t srcLength,
489 UErrorCode *pErrorCode)
490 {
491
492 /* args check */
493 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
494 return NULL;
495 }
496
497 if( (src==NULL && srcLength!=0) || srcLength < -1 ||
498 (destCapacity<0) || (dest == NULL && destCapacity > 0)
499 ) {
500 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
501 return NULL;
502 }
503
504 #ifdef U_WCHAR_IS_UTF16
505 /* wchar_t is UTF-16 just do a memcpy */
506 if(srcLength == -1){
507 srcLength = u_strlen(src);
508 }
509 if(0 < srcLength && srcLength <= destCapacity){
510 uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
511 }
512 if(pDestLength){
513 *pDestLength = srcLength;
514 }
515
516 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
517
518 return dest;
519
520 #elif defined U_WCHAR_IS_UTF32
521
522 return u_strFromUTF32(dest, destCapacity, pDestLength,
523 (UChar32*)src, srcLength, pErrorCode);
524
525 #else
526
527 return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
528
529 #endif
530
531 }
532
533 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */