]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ustrtrns.c
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / common / ustrtrns.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
374ca955 4* Copyright (C) 2001-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9* File ustrtrns.c
10*
11* Modification History:
12*
13* Date Name Description
14* 9/10/2001 Ram Creation.
15******************************************************************************
16*/
17
18/*******************************************************************************
19 *
20 * u_strTo* and u_strFrom* APIs
374ca955 21 * WCS functions moved to ustr_wcs.c for better modularization
b75a7d8f
A
22 *
23 *******************************************************************************
24 */
25
26
27#include "unicode/putil.h"
b75a7d8f
A
28#include "unicode/ustring.h"
29#include "cstring.h"
b75a7d8f
A
30#include "cmemory.h"
31#include "ustr_imp.h"
32
b75a7d8f
A
33U_CAPI UChar* U_EXPORT2
34u_strFromUTF32(UChar *dest,
35 int32_t destCapacity,
36 int32_t *pDestLength,
37 const UChar32 *src,
38 int32_t srcLength,
39 UErrorCode *pErrorCode)
40{
41 int32_t reqLength = 0;
42 uint32_t ch =0;
43 UChar *pDestLimit =dest+destCapacity;
44 UChar *pDest = dest;
45 const uint32_t *pSrc = (const uint32_t *)src;
46
47 /* args check */
48 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
49 return NULL;
50 }
51
374ca955 52 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
b75a7d8f
A
53 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
54 return NULL;
55 }
56
57 /* Check if the source is null terminated */
58 if(srcLength == -1 ){
59 while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){
60 ++pSrc;
61 if(ch<=0xFFFF){
62 *(pDest++)=(UChar)ch;
63 }else if(ch<=0x10ffff){
64 *(pDest++)=UTF16_LEAD(ch);
65 if(pDest<pDestLimit){
66 *(pDest++)=UTF16_TRAIL(ch);
67 }else{
68 reqLength++;
69 break;
70 }
71 }else{
72 *pErrorCode = U_INVALID_CHAR_FOUND;
73 return NULL;
74 }
75 }
76 while((ch=*pSrc++) != 0){
77 reqLength+=UTF_CHAR_LENGTH(ch);
78 }
79 }else{
80 const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength;
81 while((pSrc < pSrcLimit) && (pDest < pDestLimit)){
82 ch = *pSrc++;
83 if(ch<=0xFFFF){
84 *(pDest++)=(UChar)ch;
85 }else if(ch<=0x10FFFF){
86 *(pDest++)=UTF16_LEAD(ch);
87 if(pDest<pDestLimit){
88 *(pDest++)=UTF16_TRAIL(ch);
89 }else{
90 reqLength++;
91 break;
92 }
93 }else{
94 *pErrorCode = U_INVALID_CHAR_FOUND;
95 return NULL;
96 }
97 }
98 while(pSrc <pSrcLimit){
99 ch = *pSrc++;
100 reqLength+=UTF_CHAR_LENGTH(ch);
101 }
102 }
103
104 reqLength += pDest - dest;
105 if(pDestLength){
106 *pDestLength = reqLength;
107 }
108
109 /* Terminate the buffer */
110 u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
111
112 return dest;
113}
114
115
116U_CAPI UChar32* U_EXPORT2
117u_strToUTF32(UChar32 *dest,
118 int32_t destCapacity,
119 int32_t *pDestLength,
120 const UChar *src,
121 int32_t srcLength,
122 UErrorCode *pErrorCode)
123{
124 const UChar* pSrc = src;
125 const UChar* pSrcLimit;
126 int32_t reqLength=0;
127 uint32_t ch=0;
128 uint32_t *pDest = (uint32_t *)dest;
129 uint32_t *pDestLimit = pDest + destCapacity;
130 UChar ch2=0;
131
132 /* args check */
133 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
134 return NULL;
135 }
136
137
374ca955 138 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
b75a7d8f
A
139 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
140 return NULL;
141 }
142
143 if(srcLength==-1) {
144 while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
145 ++pSrc;
146 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
147 if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
148 ++pSrc;
149 ch=UTF16_GET_PAIR_VALUE(ch, ch2);
150 }
151 *(pDest++)= ch;
152 }
153 while((ch=*pSrc++)!=0) {
154 if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
155 ++pSrc;
156 }
157 ++reqLength;
158 }
159 } else {
160 pSrcLimit = pSrc+srcLength;
161 while(pSrc<pSrcLimit && pDest<pDestLimit) {
162 ch=*pSrc++;
163 if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
164 ++pSrc;
165 ch=UTF16_GET_PAIR_VALUE(ch, ch2);
166 }
167 *(pDest++)= ch;
168 }
169 while(pSrc!=pSrcLimit) {
170 ch=*pSrc++;
171 if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
172 ++pSrc;
173 }
174 ++reqLength;
175 }
176 }
177
178 reqLength+=(pDest - (uint32_t *)dest);
179 if(pDestLength){
180 *pDestLength = reqLength;
181 }
182
183 /* Terminate the buffer */
184 u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode);
185
186 return dest;
187}
188
189U_CAPI UChar* U_EXPORT2
190u_strFromUTF8(UChar *dest,
191 int32_t destCapacity,
192 int32_t *pDestLength,
193 const char* src,
194 int32_t srcLength,
195 UErrorCode *pErrorCode){
196
197 UChar *pDest = dest;
198 UChar *pDestLimit = dest+destCapacity;
199 UChar32 ch=0;
200 int32_t index = 0;
201 int32_t reqLength = 0;
202 uint8_t* pSrc = (uint8_t*) src;
203
204 /* args check */
205 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
206 return NULL;
207 }
208
374ca955 209 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
b75a7d8f
A
210 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
211 return NULL;
212 }
213
214 if(srcLength == -1){
215 srcLength = uprv_strlen((char*)pSrc);
216 }
217
218 while((index < srcLength)&&(pDest<pDestLimit)){
219 ch = pSrc[index++];
220 if(ch <=0x7f){
221 *pDest++=(UChar)ch;
222 }else{
223 ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
224 if(ch<0){
225 *pErrorCode = U_INVALID_CHAR_FOUND;
226 return NULL;
227 }else if(ch<=0xFFFF){
228 *(pDest++)=(UChar)ch;
229 }else{
230 *(pDest++)=UTF16_LEAD(ch);
231 if(pDest<pDestLimit){
232 *(pDest++)=UTF16_TRAIL(ch);
233 }else{
234 reqLength++;
235 break;
236 }
237 }
238 }
239 }
240 /* donot fill the dest buffer just count the UChars needed */
241 while(index < srcLength){
242 ch = pSrc[index++];
243 if(ch <= 0x7f){
244 reqLength++;
245 }else{
246 ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
247 if(ch<0){
248 *pErrorCode = U_INVALID_CHAR_FOUND;
249 return NULL;
250 }
251 reqLength+=UTF_CHAR_LENGTH(ch);
252 }
253 }
254
255 reqLength+=(pDest - dest);
256
257 if(pDestLength){
258 *pDestLength = reqLength;
259 }
260
261 /* Terminate the buffer */
262 u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
263
264 return dest;
265}
266
267static U_INLINE uint8_t *
268_appendUTF8(uint8_t *pDest, UChar32 c) {
269 /* c<=0x7f is handled by the caller, here it is 0x80<=c<=0x10ffff */
270 if((c)<=0x7ff) {
271 *pDest++=(uint8_t)((c>>6)|0xc0);
272 *pDest++=(uint8_t)((c&0x3f)|0x80);
273 } else if((uint32_t)(c)<=0xffff) {
274 *pDest++=(uint8_t)((c>>12)|0xe0);
275 *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
276 *pDest++=(uint8_t)(((c)&0x3f)|0x80);
277 } else /* if((uint32_t)(c)<=0x10ffff) */ {
278 *pDest++=(uint8_t)(((c)>>18)|0xf0);
279 *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
280 *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
281 *pDest++=(uint8_t)(((c)&0x3f)|0x80);
282 }
283 return pDest;
284}
285
286
287U_CAPI char* U_EXPORT2
288u_strToUTF8(char *dest,
289 int32_t destCapacity,
290 int32_t *pDestLength,
291 const UChar *pSrc,
292 int32_t srcLength,
293 UErrorCode *pErrorCode){
294
295 int32_t reqLength=0;
296 const UChar *pSrcLimit;
297 uint32_t ch=0,ch2=0;
298 uint8_t *pDest = (uint8_t *)dest;
299 uint8_t *pDestLimit = pDest + destCapacity;
300
301
302 /* args check */
303 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
304 return NULL;
305 }
306
374ca955 307 if((pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
b75a7d8f
A
308 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
309 return NULL;
310 }
311
312 if(srcLength==-1) {
313 while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
314 ++pSrc;
315 if(ch <= 0x7f) {
316 *pDest++ = (char)ch;
317 ++reqLength;
318 continue;
319 }
320
321 /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
322 if(UTF_IS_SURROGATE(ch)) {
323 if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
324 ++pSrc;
325 ch=UTF16_GET_PAIR_VALUE(ch, ch2);
326 } else {
327 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
328 *pErrorCode = U_INVALID_CHAR_FOUND;
329 return NULL;
330 }
331 }
332 reqLength += UTF8_CHAR_LENGTH(ch);
333 /* do we have enough room in destination? */
334 if(destCapacity< reqLength){
335 break;
336 }
337 /* convert and append*/
338 pDest=_appendUTF8(pDest, ch);
339 }
340 while((ch=*pSrc++)!=0) {
341 if(ch<=0x7f) {
342 ++reqLength;
343 } else if(ch<=0x7ff) {
344 reqLength+=2;
345 } else if(!UTF_IS_SURROGATE(ch)) {
346 reqLength+=3;
347 } else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
348 ++pSrc;
349 reqLength+=4;
350 } else {
351 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
352 *pErrorCode = U_INVALID_CHAR_FOUND;
353 return NULL;
354 }
355 }
356 } else {
357 pSrcLimit = pSrc+srcLength;
358 while(pSrc<pSrcLimit && pDest<pDestLimit) {
359 ch=*pSrc++;
360 if(ch <= 0x7f) {
361 *pDest++ = (char)ch;
362 ++reqLength;
363 continue;
364 }
365
366 if(UTF_IS_SURROGATE(ch)) {
367 if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
368 ++pSrc;
369 ch=UTF16_GET_PAIR_VALUE(ch, ch2);
370 } else {
371 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
372 *pErrorCode = U_INVALID_CHAR_FOUND;
373 return NULL;
374 }
375 }
376 reqLength += UTF8_CHAR_LENGTH(ch);
377 /* do we have enough room in destination? */
378 if(destCapacity< reqLength){
379 break;
380 }
381 /* convert and append*/
382 pDest=_appendUTF8(pDest, ch);
383 }
384 while(pSrc<pSrcLimit) {
385 ch=*pSrc++;
386 if(ch<=0x7f) {
387 ++reqLength;
388 } else if(ch<=0x7ff) {
389 reqLength+=2;
390 } else if(!UTF_IS_SURROGATE(ch)) {
391 reqLength+=3;
392 } else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
393 ++pSrc;
394 reqLength+=4;
395 } else {
396 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
397 *pErrorCode = U_INVALID_CHAR_FOUND;
398 return NULL;
399 }
400 }
401 }
402
403 if(pDestLength){
404 *pDestLength = reqLength;
405 }
406
407 /* Terminate the buffer */
408 u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode);
409
410 return (char*)dest;
411}