]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/ustrtrns.c
2 ******************************************************************************
4 * Copyright (C) 2001-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 9/10/2001 Ram Creation.
15 ******************************************************************************
18 /*******************************************************************************
20 * u_strTo* and u_strFrom* APIs
21 * WCS functions moved to ustr_wcs.c for better modularization
23 *******************************************************************************
27 #include "unicode/putil.h"
28 #include "unicode/ustring.h"
33 U_CAPI UChar
* U_EXPORT2
34 u_strFromUTF32(UChar
*dest
,
39 UErrorCode
*pErrorCode
)
41 int32_t reqLength
= 0;
43 UChar
*pDestLimit
=dest
+destCapacity
;
45 const uint32_t *pSrc
= (const uint32_t *)src
;
48 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
52 if((src
==NULL
) || (srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
53 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
57 /* Check if the source is null terminated */
59 while(((ch
=*pSrc
)!=0) && (pDest
< pDestLimit
)){
63 }else if(ch
<=0x10ffff){
64 *(pDest
++)=UTF16_LEAD(ch
);
66 *(pDest
++)=UTF16_TRAIL(ch
);
72 *pErrorCode
= U_INVALID_CHAR_FOUND
;
76 while((ch
=*pSrc
++) != 0){
77 reqLength
+=UTF_CHAR_LENGTH(ch
);
80 const uint32_t* pSrcLimit
= ((const uint32_t*)pSrc
) + srcLength
;
81 while((pSrc
< pSrcLimit
) && (pDest
< pDestLimit
)){
85 }else if(ch
<=0x10FFFF){
86 *(pDest
++)=UTF16_LEAD(ch
);
88 *(pDest
++)=UTF16_TRAIL(ch
);
94 *pErrorCode
= U_INVALID_CHAR_FOUND
;
98 while(pSrc
<pSrcLimit
){
100 reqLength
+=UTF_CHAR_LENGTH(ch
);
104 reqLength
+= pDest
- dest
;
106 *pDestLength
= reqLength
;
109 /* Terminate the buffer */
110 u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
);
116 U_CAPI UChar32
* U_EXPORT2
117 u_strToUTF32(UChar32
*dest
,
118 int32_t destCapacity
,
119 int32_t *pDestLength
,
122 UErrorCode
*pErrorCode
)
124 const UChar
* pSrc
= src
;
125 const UChar
* pSrcLimit
;
128 uint32_t *pDest
= (uint32_t *)dest
;
129 uint32_t *pDestLimit
= pDest
+ destCapacity
;
133 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
138 if((src
==NULL
) || (srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
139 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
144 while((ch
=*pSrc
)!=0 && pDest
!=pDestLimit
) {
146 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
147 if(UTF_IS_LEAD(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
149 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
153 while((ch
=*pSrc
++)!=0) {
154 if(UTF_IS_LEAD(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
160 pSrcLimit
= pSrc
+srcLength
;
161 while(pSrc
<pSrcLimit
&& pDest
<pDestLimit
) {
163 if(UTF_IS_LEAD(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
165 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
169 while(pSrc
!=pSrcLimit
) {
171 if(UTF_IS_LEAD(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
178 reqLength
+=(pDest
- (uint32_t *)dest
);
180 *pDestLength
= reqLength
;
183 /* Terminate the buffer */
184 u_terminateUChar32s(dest
,destCapacity
,reqLength
,pErrorCode
);
189 U_CAPI UChar
* U_EXPORT2
190 u_strFromUTF8(UChar
*dest
,
191 int32_t destCapacity
,
192 int32_t *pDestLength
,
195 UErrorCode
*pErrorCode
){
198 UChar
*pDestLimit
= dest
+destCapacity
;
201 int32_t reqLength
= 0;
202 uint8_t* pSrc
= (uint8_t*) src
;
205 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
209 if((src
==NULL
) || (srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
210 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
215 srcLength
= uprv_strlen((char*)pSrc
);
218 while((index
< srcLength
)&&(pDest
<pDestLimit
)){
223 ch
=utf8_nextCharSafeBody(pSrc
, &index
, srcLength
, ch
, -1);
225 *pErrorCode
= U_INVALID_CHAR_FOUND
;
227 }else if(ch
<=0xFFFF){
228 *(pDest
++)=(UChar
)ch
;
230 *(pDest
++)=UTF16_LEAD(ch
);
231 if(pDest
<pDestLimit
){
232 *(pDest
++)=UTF16_TRAIL(ch
);
240 /* donot fill the dest buffer just count the UChars needed */
241 while(index
< srcLength
){
246 ch
=utf8_nextCharSafeBody(pSrc
, &index
, srcLength
, ch
, -1);
248 *pErrorCode
= U_INVALID_CHAR_FOUND
;
251 reqLength
+=UTF_CHAR_LENGTH(ch
);
255 reqLength
+=(pDest
- dest
);
258 *pDestLength
= reqLength
;
261 /* Terminate the buffer */
262 u_terminateUChars(dest
,destCapacity
,reqLength
,pErrorCode
);
267 static U_INLINE
uint8_t *
268 _appendUTF8(uint8_t *pDest
, UChar32 c
) {
269 /* c<=0x7f is handled by the caller, here it is 0x80<=c<=0x10ffff */
271 *pDest
++=(uint8_t)((c
>>6)|0xc0);
272 *pDest
++=(uint8_t)((c
&0x3f)|0x80);
273 } else if((uint32_t)(c
)<=0xffff) {
274 *pDest
++=(uint8_t)((c
>>12)|0xe0);
275 *pDest
++=(uint8_t)(((c
>>6)&0x3f)|0x80);
276 *pDest
++=(uint8_t)(((c
)&0x3f)|0x80);
277 } else /* if((uint32_t)(c)<=0x10ffff) */ {
278 *pDest
++=(uint8_t)(((c
)>>18)|0xf0);
279 *pDest
++=(uint8_t)((((c
)>>12)&0x3f)|0x80);
280 *pDest
++=(uint8_t)((((c
)>>6)&0x3f)|0x80);
281 *pDest
++=(uint8_t)(((c
)&0x3f)|0x80);
287 U_CAPI
char* U_EXPORT2
288 u_strToUTF8(char *dest
,
289 int32_t destCapacity
,
290 int32_t *pDestLength
,
293 UErrorCode
*pErrorCode
){
296 const UChar
*pSrcLimit
;
298 uint8_t *pDest
= (uint8_t *)dest
;
299 uint8_t *pDestLimit
= pDest
+ destCapacity
;
303 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)){
307 if((pSrc
==NULL
) || (srcLength
< -1) || (destCapacity
<0) || (!dest
&& destCapacity
> 0)){
308 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
313 while((ch
=*pSrc
)!=0 && pDest
!=pDestLimit
) {
321 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
322 if(UTF_IS_SURROGATE(ch
)) {
323 if(UTF_IS_SURROGATE_FIRST(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
325 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
327 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
328 *pErrorCode
= U_INVALID_CHAR_FOUND
;
332 reqLength
+= UTF8_CHAR_LENGTH(ch
);
333 /* do we have enough room in destination? */
334 if(destCapacity
< reqLength
){
337 /* convert and append*/
338 pDest
=_appendUTF8(pDest
, ch
);
340 while((ch
=*pSrc
++)!=0) {
343 } else if(ch
<=0x7ff) {
345 } else if(!UTF_IS_SURROGATE(ch
)) {
347 } else if(UTF_IS_SURROGATE_FIRST(ch
) && UTF_IS_TRAIL(ch2
=*pSrc
)) {
351 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
352 *pErrorCode
= U_INVALID_CHAR_FOUND
;
357 pSrcLimit
= pSrc
+srcLength
;
358 while(pSrc
<pSrcLimit
&& pDest
<pDestLimit
) {
366 if(UTF_IS_SURROGATE(ch
)) {
367 if(UTF_IS_SURROGATE_FIRST(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
369 ch
=UTF16_GET_PAIR_VALUE(ch
, ch2
);
371 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
372 *pErrorCode
= U_INVALID_CHAR_FOUND
;
376 reqLength
+= UTF8_CHAR_LENGTH(ch
);
377 /* do we have enough room in destination? */
378 if(destCapacity
< reqLength
){
381 /* convert and append*/
382 pDest
=_appendUTF8(pDest
, ch
);
384 while(pSrc
<pSrcLimit
) {
388 } else if(ch
<=0x7ff) {
390 } else if(!UTF_IS_SURROGATE(ch
)) {
392 } else if(UTF_IS_SURROGATE_FIRST(ch
) && pSrc
<pSrcLimit
&& UTF_IS_TRAIL(ch2
=*pSrc
)) {
396 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
397 *pErrorCode
= U_INVALID_CHAR_FOUND
;
404 *pDestLength
= reqLength
;
407 /* Terminate the buffer */
408 u_terminateChars((char*)dest
,destCapacity
,reqLength
,pErrorCode
);