]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr_cnv.cpp
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / common / unistr_cnv.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: unistr_cnv.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:2
12 *
13 * created on: 2004aug19
14 * created by: Markus W. Scherer
15 *
16 * Character conversion functions moved here from unistr.cpp
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_CONVERSION
22
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/ucnv.h"
29 #include "putilimp.h"
30 #include "ustr_cnv.h"
31 #include "ustr_imp.h"
32
33 U_NAMESPACE_BEGIN
34
35 //========================================
36 // Constructors
37 //========================================
38
39 UnicodeString::UnicodeString(const char *codepageData,
40 const char *codepage)
41 : fShortLength(0),
42 fFlags(kShortString)
43 {
44 if(codepageData != 0) {
45 doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
46 }
47 }
48
49
50 UnicodeString::UnicodeString(const char *codepageData,
51 int32_t dataLength,
52 const char *codepage)
53 : fShortLength(0),
54 fFlags(kShortString)
55 {
56 if(codepageData != 0) {
57 doCodepageCreate(codepageData, dataLength, codepage);
58 }
59 }
60
61 UnicodeString::UnicodeString(const char *src, int32_t srcLength,
62 UConverter *cnv,
63 UErrorCode &errorCode)
64 : fShortLength(0),
65 fFlags(kShortString)
66 {
67 if(U_SUCCESS(errorCode)) {
68 // check arguments
69 if(src==NULL) {
70 // treat as an empty string, do nothing more
71 } else if(srcLength<-1) {
72 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73 } else {
74 // get input length
75 if(srcLength==-1) {
76 srcLength=(int32_t)uprv_strlen(src);
77 }
78 if(srcLength>0) {
79 if(cnv!=0) {
80 // use the provided converter
81 ucnv_resetToUnicode(cnv);
82 doCodepageCreate(src, srcLength, cnv, errorCode);
83 } else {
84 // use the default converter
85 cnv=u_getDefaultConverter(&errorCode);
86 doCodepageCreate(src, srcLength, cnv, errorCode);
87 u_releaseDefaultConverter(cnv);
88 }
89 }
90 }
91
92 if(U_FAILURE(errorCode)) {
93 setToBogus();
94 }
95 }
96 }
97
98 //========================================
99 // Codeset conversion
100 //========================================
101 int32_t
102 UnicodeString::extract(int32_t start,
103 int32_t length,
104 char *target,
105 uint32_t dstSize,
106 const char *codepage) const
107 {
108 // if the arguments are illegal, then do nothing
109 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
110 return 0;
111 }
112
113 // pin the indices to legal values
114 pinIndices(start, length);
115
116 // create the converter
117 UConverter *converter;
118 UErrorCode status = U_ZERO_ERROR;
119
120 // just write the NUL if the string length is 0
121 if(length == 0) {
122 if(dstSize >= 0x80000000) {
123 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
124 // make sure that the NUL-termination works (takes int32_t)
125 dstSize=0x7fffffff;
126 }
127 return u_terminateChars(target, dstSize, 0, &status);
128 }
129
130 // if the codepage is the default, use our cache
131 // if it is an empty string, then use the "invariant character" conversion
132 if (codepage == 0) {
133 converter = u_getDefaultConverter(&status);
134 } else if (*codepage == 0) {
135 // use the "invariant characters" conversion
136 int32_t destLength;
137 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
138 if(dstSize >= 0x80000000) {
139 destLength = length;
140 // make sure that the NUL-termination works (takes int32_t)
141 dstSize=0x7fffffff;
142 } else if(length <= (int32_t)dstSize) {
143 destLength = length;
144 } else {
145 destLength = (int32_t)dstSize;
146 }
147 u_UCharsToChars(getArrayStart() + start, target, destLength);
148 return u_terminateChars(target, (int32_t)dstSize, length, &status);
149 } else {
150 converter = ucnv_open(codepage, &status);
151 }
152
153 length = doExtract(start, length, target, (int32_t)dstSize, converter, status);
154
155 // close the converter
156 if (codepage == 0) {
157 u_releaseDefaultConverter(converter);
158 } else {
159 ucnv_close(converter);
160 }
161
162 return length;
163 }
164
165 int32_t
166 UnicodeString::extract(char *dest, int32_t destCapacity,
167 UConverter *cnv,
168 UErrorCode &errorCode) const
169 {
170 if(U_FAILURE(errorCode)) {
171 return 0;
172 }
173
174 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
175 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
176 return 0;
177 }
178
179 // nothing to do?
180 if(isEmpty()) {
181 return u_terminateChars(dest, destCapacity, 0, &errorCode);
182 }
183
184 // get the converter
185 UBool isDefaultConverter;
186 if(cnv==0) {
187 isDefaultConverter=TRUE;
188 cnv=u_getDefaultConverter(&errorCode);
189 if(U_FAILURE(errorCode)) {
190 return 0;
191 }
192 } else {
193 isDefaultConverter=FALSE;
194 ucnv_resetFromUnicode(cnv);
195 }
196
197 // convert
198 int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
199
200 // release the converter
201 if(isDefaultConverter) {
202 u_releaseDefaultConverter(cnv);
203 }
204
205 return len;
206 }
207
208 int32_t
209 UnicodeString::doExtract(int32_t start, int32_t length,
210 char *dest, int32_t destCapacity,
211 UConverter *cnv,
212 UErrorCode &errorCode) const
213 {
214 if(U_FAILURE(errorCode)) {
215 if(destCapacity!=0) {
216 *dest=0;
217 }
218 return 0;
219 }
220
221 const UChar *src=getArrayStart()+start, *srcLimit=src+length;
222 char *originalDest=dest;
223 const char *destLimit;
224
225 if(destCapacity==0) {
226 destLimit=dest=0;
227 } else if(destCapacity==-1) {
228 // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
229 destLimit=(char*)U_MAX_PTR(dest);
230 // for NUL-termination, translate into highest int32_t
231 destCapacity=0x7fffffff;
232 } else {
233 destLimit=dest+destCapacity;
234 }
235
236 // perform the conversion
237 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
238 length=(int32_t)(dest-originalDest);
239
240 // if an overflow occurs, then get the preflighting length
241 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
242 char buffer[1024];
243
244 destLimit=buffer+sizeof(buffer);
245 do {
246 dest=buffer;
247 errorCode=U_ZERO_ERROR;
248 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
249 length+=(int32_t)(dest-buffer);
250 } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
251 }
252
253 return u_terminateChars(originalDest, destCapacity, length, &errorCode);
254 }
255
256 void
257 UnicodeString::doCodepageCreate(const char *codepageData,
258 int32_t dataLength,
259 const char *codepage)
260 {
261 // if there's nothing to convert, do nothing
262 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
263 return;
264 }
265 if(dataLength == -1) {
266 dataLength = (int32_t)uprv_strlen(codepageData);
267 }
268
269 UErrorCode status = U_ZERO_ERROR;
270
271 // create the converter
272 // if the codepage is the default, use our cache
273 // if it is an empty string, then use the "invariant character" conversion
274 UConverter *converter = (codepage == 0 ?
275 u_getDefaultConverter(&status) :
276 *codepage == 0 ?
277 0 :
278 ucnv_open(codepage, &status));
279
280 // if we failed, set the appropriate flags and return
281 if(U_FAILURE(status)) {
282 setToBogus();
283 return;
284 }
285
286 // perform the conversion
287 if(converter == 0) {
288 // use the "invariant characters" conversion
289 if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
290 u_charsToUChars(codepageData, getArrayStart(), dataLength);
291 setLength(dataLength);
292 } else {
293 setToBogus();
294 }
295 return;
296 }
297
298 // convert using the real converter
299 doCodepageCreate(codepageData, dataLength, converter, status);
300 if(U_FAILURE(status)) {
301 setToBogus();
302 }
303
304 // close the converter
305 if(codepage == 0) {
306 u_releaseDefaultConverter(converter);
307 } else {
308 ucnv_close(converter);
309 }
310 }
311
312 void
313 UnicodeString::doCodepageCreate(const char *codepageData,
314 int32_t dataLength,
315 UConverter *converter,
316 UErrorCode &status)
317 {
318 if(U_FAILURE(status)) {
319 return;
320 }
321
322 // set up the conversion parameters
323 const char *mySource = codepageData;
324 const char *mySourceEnd = mySource + dataLength;
325 UChar *array, *myTarget;
326
327 // estimate the size needed:
328 int32_t arraySize;
329 if(dataLength <= US_STACKBUF_SIZE) {
330 // try to use the stack buffer
331 arraySize = US_STACKBUF_SIZE;
332 } else {
333 // 1.25 UChar's per source byte should cover most cases
334 arraySize = dataLength + (dataLength >> 2);
335 }
336
337 // we do not care about the current contents
338 UBool doCopyArray = FALSE;
339 for(;;) {
340 if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
341 setToBogus();
342 break;
343 }
344
345 // perform the conversion
346 array = getArrayStart();
347 myTarget = array + length();
348 ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
349 &mySource, mySourceEnd, 0, TRUE, &status);
350
351 // update the conversion parameters
352 setLength((int32_t)(myTarget - array));
353
354 // allocate more space and copy data, if needed
355 if(status == U_BUFFER_OVERFLOW_ERROR) {
356 // reset the error code
357 status = U_ZERO_ERROR;
358
359 // keep the previous conversion results
360 doCopyArray = TRUE;
361
362 // estimate the new size needed, larger than before
363 // try 2 UChar's per remaining source byte
364 arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
365 } else {
366 break;
367 }
368 }
369 }
370
371 U_NAMESPACE_END
372
373 #endif