]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr_cnv.cpp
2 *******************************************************************************
4 * Copyright (C) 1999-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: unistr_cnv.cpp
10 * tab size: 8 (not used)
13 * created on: 2004aug19
14 * created by: Markus W. Scherer
16 * Character conversion functions moved here from unistr.cpp
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_CONVERSION
23 #include "unicode/putil.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/ucnv.h"
35 //========================================
37 //========================================
39 UnicodeString::UnicodeString(const char *codepageData
,
44 if(codepageData
!= 0) {
45 doCodepageCreate(codepageData
, (int32_t)uprv_strlen(codepageData
), codepage
);
50 UnicodeString::UnicodeString(const char *codepageData
,
56 if(codepageData
!= 0) {
57 doCodepageCreate(codepageData
, dataLength
, codepage
);
61 UnicodeString::UnicodeString(const char *src
, int32_t srcLength
,
63 UErrorCode
&errorCode
)
67 if(U_SUCCESS(errorCode
)) {
70 // treat as an empty string, do nothing more
71 } else if(srcLength
<-1) {
72 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
76 srcLength
=(int32_t)uprv_strlen(src
);
80 // use the provided converter
81 ucnv_resetToUnicode(cnv
);
82 doCodepageCreate(src
, srcLength
, cnv
, errorCode
);
84 // use the default converter
85 cnv
=u_getDefaultConverter(&errorCode
);
86 doCodepageCreate(src
, srcLength
, cnv
, errorCode
);
87 u_releaseDefaultConverter(cnv
);
92 if(U_FAILURE(errorCode
)) {
98 //========================================
100 //========================================
102 UnicodeString::extract(int32_t start
,
106 const char *codepage
) const
108 // if the arguments are illegal, then do nothing
109 if(/*dstSize < 0 || */(dstSize
> 0 && target
== 0)) {
113 // pin the indices to legal values
114 pinIndices(start
, length
);
116 // create the converter
117 UConverter
*converter
;
118 UErrorCode status
= U_ZERO_ERROR
;
120 // just write the NUL if the string length is 0
122 if(dstSize
>= 0x80000000) {
123 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
124 // make sure that the NUL-termination works (takes int32_t)
127 return u_terminateChars(target
, dstSize
, 0, &status
);
130 // if the codepage is the default, use our cache
131 // if it is an empty string, then use the "invariant character" conversion
133 converter
= u_getDefaultConverter(&status
);
134 } else if (*codepage
== 0) {
135 // use the "invariant characters" conversion
137 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
138 if(dstSize
>= 0x80000000) {
140 // make sure that the NUL-termination works (takes int32_t)
142 } else if(length
<= (int32_t)dstSize
) {
145 destLength
= (int32_t)dstSize
;
147 u_UCharsToChars(getArrayStart() + start
, target
, destLength
);
148 return u_terminateChars(target
, (int32_t)dstSize
, length
, &status
);
150 converter
= ucnv_open(codepage
, &status
);
153 length
= doExtract(start
, length
, target
, (int32_t)dstSize
, converter
, status
);
155 // close the converter
157 u_releaseDefaultConverter(converter
);
159 ucnv_close(converter
);
166 UnicodeString::extract(char *dest
, int32_t destCapacity
,
168 UErrorCode
&errorCode
) const
170 if(U_FAILURE(errorCode
)) {
174 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
175 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
181 return u_terminateChars(dest
, destCapacity
, 0, &errorCode
);
185 UBool isDefaultConverter
;
187 isDefaultConverter
=TRUE
;
188 cnv
=u_getDefaultConverter(&errorCode
);
189 if(U_FAILURE(errorCode
)) {
193 isDefaultConverter
=FALSE
;
194 ucnv_resetFromUnicode(cnv
);
198 int32_t len
=doExtract(0, length(), dest
, destCapacity
, cnv
, errorCode
);
200 // release the converter
201 if(isDefaultConverter
) {
202 u_releaseDefaultConverter(cnv
);
209 UnicodeString::doExtract(int32_t start
, int32_t length
,
210 char *dest
, int32_t destCapacity
,
212 UErrorCode
&errorCode
) const
214 if(U_FAILURE(errorCode
)) {
215 if(destCapacity
!=0) {
221 const UChar
*src
=getArrayStart()+start
, *srcLimit
=src
+length
;
222 char *originalDest
=dest
;
223 const char *destLimit
;
225 if(destCapacity
==0) {
227 } else if(destCapacity
==-1) {
228 // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
229 destLimit
=(char*)U_MAX_PTR(dest
);
230 // for NUL-termination, translate into highest int32_t
231 destCapacity
=0x7fffffff;
233 destLimit
=dest
+destCapacity
;
236 // perform the conversion
237 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, &errorCode
);
238 length
=(int32_t)(dest
-originalDest
);
240 // if an overflow occurs, then get the preflighting length
241 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
244 destLimit
=buffer
+sizeof(buffer
);
247 errorCode
=U_ZERO_ERROR
;
248 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, &errorCode
);
249 length
+=(int32_t)(dest
-buffer
);
250 } while(errorCode
==U_BUFFER_OVERFLOW_ERROR
);
253 return u_terminateChars(originalDest
, destCapacity
, length
, &errorCode
);
257 UnicodeString::doCodepageCreate(const char *codepageData
,
259 const char *codepage
)
261 // if there's nothing to convert, do nothing
262 if(codepageData
== 0 || dataLength
== 0 || dataLength
< -1) {
265 if(dataLength
== -1) {
266 dataLength
= (int32_t)uprv_strlen(codepageData
);
269 UErrorCode status
= U_ZERO_ERROR
;
271 // create the converter
272 // if the codepage is the default, use our cache
273 // if it is an empty string, then use the "invariant character" conversion
274 UConverter
*converter
= (codepage
== 0 ?
275 u_getDefaultConverter(&status
) :
278 ucnv_open(codepage
, &status
));
280 // if we failed, set the appropriate flags and return
281 if(U_FAILURE(status
)) {
286 // perform the conversion
288 // use the "invariant characters" conversion
289 if(cloneArrayIfNeeded(dataLength
, dataLength
, FALSE
)) {
290 u_charsToUChars(codepageData
, getArrayStart(), dataLength
);
291 setLength(dataLength
);
298 // convert using the real converter
299 doCodepageCreate(codepageData
, dataLength
, converter
, status
);
300 if(U_FAILURE(status
)) {
304 // close the converter
306 u_releaseDefaultConverter(converter
);
308 ucnv_close(converter
);
313 UnicodeString::doCodepageCreate(const char *codepageData
,
315 UConverter
*converter
,
318 if(U_FAILURE(status
)) {
322 // set up the conversion parameters
323 const char *mySource
= codepageData
;
324 const char *mySourceEnd
= mySource
+ dataLength
;
325 UChar
*array
, *myTarget
;
327 // estimate the size needed:
329 if(dataLength
<= US_STACKBUF_SIZE
) {
330 // try to use the stack buffer
331 arraySize
= US_STACKBUF_SIZE
;
333 // 1.25 UChar's per source byte should cover most cases
334 arraySize
= dataLength
+ (dataLength
>> 2);
337 // we do not care about the current contents
338 UBool doCopyArray
= FALSE
;
340 if(!cloneArrayIfNeeded(arraySize
, arraySize
, doCopyArray
)) {
345 // perform the conversion
346 array
= getArrayStart();
347 myTarget
= array
+ length();
348 ucnv_toUnicode(converter
, &myTarget
, array
+ getCapacity(),
349 &mySource
, mySourceEnd
, 0, TRUE
, &status
);
351 // update the conversion parameters
352 setLength((int32_t)(myTarget
- array
));
354 // allocate more space and copy data, if needed
355 if(status
== U_BUFFER_OVERFLOW_ERROR
) {
356 // reset the error code
357 status
= U_ZERO_ERROR
;
359 // keep the previous conversion results
362 // estimate the new size needed, larger than before
363 // try 2 UChar's per remaining source byte
364 arraySize
= (int32_t)(length() + 2 * (mySourceEnd
- mySource
));