]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
3 | * | |
374ca955 | 4 | * Copyright (C) 2001-2004, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ****************************************************************************** | |
8 | * | |
9 | * File ustrtrns.c | |
10 | * | |
11 | * Modification History: | |
12 | * | |
13 | * Date Name Description | |
14 | * 9/10/2001 Ram Creation. | |
15 | ****************************************************************************** | |
16 | */ | |
17 | ||
18 | /******************************************************************************* | |
19 | * | |
20 | * u_strTo* and u_strFrom* APIs | |
374ca955 | 21 | * WCS functions moved to ustr_wcs.c for better modularization |
b75a7d8f A |
22 | * |
23 | ******************************************************************************* | |
24 | */ | |
25 | ||
26 | ||
27 | #include "unicode/putil.h" | |
b75a7d8f A |
28 | #include "unicode/ustring.h" |
29 | #include "cstring.h" | |
b75a7d8f A |
30 | #include "cmemory.h" |
31 | #include "ustr_imp.h" | |
32 | ||
b75a7d8f A |
33 | U_CAPI UChar* U_EXPORT2 |
34 | u_strFromUTF32(UChar *dest, | |
35 | int32_t destCapacity, | |
36 | int32_t *pDestLength, | |
37 | const UChar32 *src, | |
38 | int32_t srcLength, | |
39 | UErrorCode *pErrorCode) | |
40 | { | |
41 | int32_t reqLength = 0; | |
42 | uint32_t ch =0; | |
43 | UChar *pDestLimit =dest+destCapacity; | |
44 | UChar *pDest = dest; | |
45 | const uint32_t *pSrc = (const uint32_t *)src; | |
46 | ||
47 | /* args check */ | |
48 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ | |
49 | return NULL; | |
50 | } | |
51 | ||
374ca955 | 52 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
b75a7d8f A |
53 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
54 | return NULL; | |
55 | } | |
56 | ||
57 | /* Check if the source is null terminated */ | |
58 | if(srcLength == -1 ){ | |
59 | while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){ | |
60 | ++pSrc; | |
61 | if(ch<=0xFFFF){ | |
62 | *(pDest++)=(UChar)ch; | |
63 | }else if(ch<=0x10ffff){ | |
64 | *(pDest++)=UTF16_LEAD(ch); | |
65 | if(pDest<pDestLimit){ | |
66 | *(pDest++)=UTF16_TRAIL(ch); | |
67 | }else{ | |
68 | reqLength++; | |
69 | break; | |
70 | } | |
71 | }else{ | |
72 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
73 | return NULL; | |
74 | } | |
75 | } | |
76 | while((ch=*pSrc++) != 0){ | |
77 | reqLength+=UTF_CHAR_LENGTH(ch); | |
78 | } | |
79 | }else{ | |
80 | const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength; | |
81 | while((pSrc < pSrcLimit) && (pDest < pDestLimit)){ | |
82 | ch = *pSrc++; | |
83 | if(ch<=0xFFFF){ | |
84 | *(pDest++)=(UChar)ch; | |
85 | }else if(ch<=0x10FFFF){ | |
86 | *(pDest++)=UTF16_LEAD(ch); | |
87 | if(pDest<pDestLimit){ | |
88 | *(pDest++)=UTF16_TRAIL(ch); | |
89 | }else{ | |
90 | reqLength++; | |
91 | break; | |
92 | } | |
93 | }else{ | |
94 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
95 | return NULL; | |
96 | } | |
97 | } | |
98 | while(pSrc <pSrcLimit){ | |
99 | ch = *pSrc++; | |
100 | reqLength+=UTF_CHAR_LENGTH(ch); | |
101 | } | |
102 | } | |
103 | ||
104 | reqLength += pDest - dest; | |
105 | if(pDestLength){ | |
106 | *pDestLength = reqLength; | |
107 | } | |
108 | ||
109 | /* Terminate the buffer */ | |
110 | u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); | |
111 | ||
112 | return dest; | |
113 | } | |
114 | ||
115 | ||
116 | U_CAPI UChar32* U_EXPORT2 | |
117 | u_strToUTF32(UChar32 *dest, | |
118 | int32_t destCapacity, | |
119 | int32_t *pDestLength, | |
120 | const UChar *src, | |
121 | int32_t srcLength, | |
122 | UErrorCode *pErrorCode) | |
123 | { | |
124 | const UChar* pSrc = src; | |
125 | const UChar* pSrcLimit; | |
126 | int32_t reqLength=0; | |
127 | uint32_t ch=0; | |
128 | uint32_t *pDest = (uint32_t *)dest; | |
129 | uint32_t *pDestLimit = pDest + destCapacity; | |
130 | UChar ch2=0; | |
131 | ||
132 | /* args check */ | |
133 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ | |
134 | return NULL; | |
135 | } | |
136 | ||
137 | ||
374ca955 | 138 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
b75a7d8f A |
139 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
140 | return NULL; | |
141 | } | |
142 | ||
143 | if(srcLength==-1) { | |
144 | while((ch=*pSrc)!=0 && pDest!=pDestLimit) { | |
145 | ++pSrc; | |
146 | /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/ | |
147 | if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { | |
148 | ++pSrc; | |
149 | ch=UTF16_GET_PAIR_VALUE(ch, ch2); | |
150 | } | |
151 | *(pDest++)= ch; | |
152 | } | |
153 | while((ch=*pSrc++)!=0) { | |
154 | if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { | |
155 | ++pSrc; | |
156 | } | |
157 | ++reqLength; | |
158 | } | |
159 | } else { | |
160 | pSrcLimit = pSrc+srcLength; | |
161 | while(pSrc<pSrcLimit && pDest<pDestLimit) { | |
162 | ch=*pSrc++; | |
163 | if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { | |
164 | ++pSrc; | |
165 | ch=UTF16_GET_PAIR_VALUE(ch, ch2); | |
166 | } | |
167 | *(pDest++)= ch; | |
168 | } | |
169 | while(pSrc!=pSrcLimit) { | |
170 | ch=*pSrc++; | |
171 | if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { | |
172 | ++pSrc; | |
173 | } | |
174 | ++reqLength; | |
175 | } | |
176 | } | |
177 | ||
178 | reqLength+=(pDest - (uint32_t *)dest); | |
179 | if(pDestLength){ | |
180 | *pDestLength = reqLength; | |
181 | } | |
182 | ||
183 | /* Terminate the buffer */ | |
184 | u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode); | |
185 | ||
186 | return dest; | |
187 | } | |
188 | ||
189 | U_CAPI UChar* U_EXPORT2 | |
190 | u_strFromUTF8(UChar *dest, | |
191 | int32_t destCapacity, | |
192 | int32_t *pDestLength, | |
193 | const char* src, | |
194 | int32_t srcLength, | |
195 | UErrorCode *pErrorCode){ | |
196 | ||
197 | UChar *pDest = dest; | |
198 | UChar *pDestLimit = dest+destCapacity; | |
199 | UChar32 ch=0; | |
200 | int32_t index = 0; | |
201 | int32_t reqLength = 0; | |
202 | uint8_t* pSrc = (uint8_t*) src; | |
203 | ||
204 | /* args check */ | |
205 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ | |
206 | return NULL; | |
207 | } | |
208 | ||
374ca955 | 209 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
b75a7d8f A |
210 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
211 | return NULL; | |
212 | } | |
213 | ||
214 | if(srcLength == -1){ | |
215 | srcLength = uprv_strlen((char*)pSrc); | |
216 | } | |
217 | ||
218 | while((index < srcLength)&&(pDest<pDestLimit)){ | |
219 | ch = pSrc[index++]; | |
220 | if(ch <=0x7f){ | |
221 | *pDest++=(UChar)ch; | |
222 | }else{ | |
223 | ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1); | |
224 | if(ch<0){ | |
225 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
226 | return NULL; | |
227 | }else if(ch<=0xFFFF){ | |
228 | *(pDest++)=(UChar)ch; | |
229 | }else{ | |
230 | *(pDest++)=UTF16_LEAD(ch); | |
231 | if(pDest<pDestLimit){ | |
232 | *(pDest++)=UTF16_TRAIL(ch); | |
233 | }else{ | |
234 | reqLength++; | |
235 | break; | |
236 | } | |
237 | } | |
238 | } | |
239 | } | |
240 | /* donot fill the dest buffer just count the UChars needed */ | |
241 | while(index < srcLength){ | |
242 | ch = pSrc[index++]; | |
243 | if(ch <= 0x7f){ | |
244 | reqLength++; | |
245 | }else{ | |
246 | ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1); | |
247 | if(ch<0){ | |
248 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
249 | return NULL; | |
250 | } | |
251 | reqLength+=UTF_CHAR_LENGTH(ch); | |
252 | } | |
253 | } | |
254 | ||
255 | reqLength+=(pDest - dest); | |
256 | ||
257 | if(pDestLength){ | |
258 | *pDestLength = reqLength; | |
259 | } | |
260 | ||
261 | /* Terminate the buffer */ | |
262 | u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); | |
263 | ||
264 | return dest; | |
265 | } | |
266 | ||
267 | static U_INLINE uint8_t * | |
268 | _appendUTF8(uint8_t *pDest, UChar32 c) { | |
269 | /* c<=0x7f is handled by the caller, here it is 0x80<=c<=0x10ffff */ | |
270 | if((c)<=0x7ff) { | |
271 | *pDest++=(uint8_t)((c>>6)|0xc0); | |
272 | *pDest++=(uint8_t)((c&0x3f)|0x80); | |
273 | } else if((uint32_t)(c)<=0xffff) { | |
274 | *pDest++=(uint8_t)((c>>12)|0xe0); | |
275 | *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80); | |
276 | *pDest++=(uint8_t)(((c)&0x3f)|0x80); | |
277 | } else /* if((uint32_t)(c)<=0x10ffff) */ { | |
278 | *pDest++=(uint8_t)(((c)>>18)|0xf0); | |
279 | *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80); | |
280 | *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80); | |
281 | *pDest++=(uint8_t)(((c)&0x3f)|0x80); | |
282 | } | |
283 | return pDest; | |
284 | } | |
285 | ||
286 | ||
287 | U_CAPI char* U_EXPORT2 | |
288 | u_strToUTF8(char *dest, | |
289 | int32_t destCapacity, | |
290 | int32_t *pDestLength, | |
291 | const UChar *pSrc, | |
292 | int32_t srcLength, | |
293 | UErrorCode *pErrorCode){ | |
294 | ||
295 | int32_t reqLength=0; | |
296 | const UChar *pSrcLimit; | |
297 | uint32_t ch=0,ch2=0; | |
298 | uint8_t *pDest = (uint8_t *)dest; | |
299 | uint8_t *pDestLimit = pDest + destCapacity; | |
300 | ||
301 | ||
302 | /* args check */ | |
303 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ | |
304 | return NULL; | |
305 | } | |
306 | ||
374ca955 | 307 | if((pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
b75a7d8f A |
308 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
309 | return NULL; | |
310 | } | |
311 | ||
312 | if(srcLength==-1) { | |
313 | while((ch=*pSrc)!=0 && pDest!=pDestLimit) { | |
314 | ++pSrc; | |
315 | if(ch <= 0x7f) { | |
316 | *pDest++ = (char)ch; | |
317 | ++reqLength; | |
318 | continue; | |
319 | } | |
320 | ||
321 | /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/ | |
322 | if(UTF_IS_SURROGATE(ch)) { | |
323 | if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { | |
324 | ++pSrc; | |
325 | ch=UTF16_GET_PAIR_VALUE(ch, ch2); | |
326 | } else { | |
327 | /* Unicode 3.2 forbids surrogate code points in UTF-8 */ | |
328 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
329 | return NULL; | |
330 | } | |
331 | } | |
332 | reqLength += UTF8_CHAR_LENGTH(ch); | |
333 | /* do we have enough room in destination? */ | |
334 | if(destCapacity< reqLength){ | |
335 | break; | |
336 | } | |
337 | /* convert and append*/ | |
338 | pDest=_appendUTF8(pDest, ch); | |
339 | } | |
340 | while((ch=*pSrc++)!=0) { | |
341 | if(ch<=0x7f) { | |
342 | ++reqLength; | |
343 | } else if(ch<=0x7ff) { | |
344 | reqLength+=2; | |
345 | } else if(!UTF_IS_SURROGATE(ch)) { | |
346 | reqLength+=3; | |
347 | } else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { | |
348 | ++pSrc; | |
349 | reqLength+=4; | |
350 | } else { | |
351 | /* Unicode 3.2 forbids surrogate code points in UTF-8 */ | |
352 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
353 | return NULL; | |
354 | } | |
355 | } | |
356 | } else { | |
357 | pSrcLimit = pSrc+srcLength; | |
358 | while(pSrc<pSrcLimit && pDest<pDestLimit) { | |
359 | ch=*pSrc++; | |
360 | if(ch <= 0x7f) { | |
361 | *pDest++ = (char)ch; | |
362 | ++reqLength; | |
363 | continue; | |
364 | } | |
365 | ||
366 | if(UTF_IS_SURROGATE(ch)) { | |
367 | if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { | |
368 | ++pSrc; | |
369 | ch=UTF16_GET_PAIR_VALUE(ch, ch2); | |
370 | } else { | |
371 | /* Unicode 3.2 forbids surrogate code points in UTF-8 */ | |
372 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
373 | return NULL; | |
374 | } | |
375 | } | |
376 | reqLength += UTF8_CHAR_LENGTH(ch); | |
377 | /* do we have enough room in destination? */ | |
378 | if(destCapacity< reqLength){ | |
379 | break; | |
380 | } | |
381 | /* convert and append*/ | |
382 | pDest=_appendUTF8(pDest, ch); | |
383 | } | |
384 | while(pSrc<pSrcLimit) { | |
385 | ch=*pSrc++; | |
386 | if(ch<=0x7f) { | |
387 | ++reqLength; | |
388 | } else if(ch<=0x7ff) { | |
389 | reqLength+=2; | |
390 | } else if(!UTF_IS_SURROGATE(ch)) { | |
391 | reqLength+=3; | |
392 | } else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { | |
393 | ++pSrc; | |
394 | reqLength+=4; | |
395 | } else { | |
396 | /* Unicode 3.2 forbids surrogate code points in UTF-8 */ | |
397 | *pErrorCode = U_INVALID_CHAR_FOUND; | |
398 | return NULL; | |
399 | } | |
400 | } | |
401 | } | |
402 | ||
403 | if(pDestLength){ | |
404 | *pDestLength = reqLength; | |
405 | } | |
406 | ||
407 | /* Terminate the buffer */ | |
408 | u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode); | |
409 | ||
410 | return (char*)dest; | |
411 | } |