]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr_cnv.cpp
2 *******************************************************************************
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: unistr_cnv.cpp
10 * tab size: 8 (not used)
13 * created on: 2004aug19
14 * created by: Markus W. Scherer
16 * Character conversion functions moved here from unistr.cpp
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_CONVERSION
23 #include "unicode/putil.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/ucnv.h"
35 //========================================
37 //========================================
39 UnicodeString::UnicodeString(const char *codepageData
,
42 fCapacity(US_STACKBUF_SIZE
),
46 if(codepageData
!= 0) {
47 doCodepageCreate(codepageData
, (int32_t)uprv_strlen(codepageData
), codepage
);
52 UnicodeString::UnicodeString(const char *codepageData
,
56 fCapacity(US_STACKBUF_SIZE
),
60 if(codepageData
!= 0) {
61 doCodepageCreate(codepageData
, dataLength
, codepage
);
65 UnicodeString::UnicodeString(const char *src
, int32_t srcLength
,
67 UErrorCode
&errorCode
)
69 fCapacity(US_STACKBUF_SIZE
),
73 if(U_SUCCESS(errorCode
)) {
76 // treat as an empty string, do nothing more
77 } else if(srcLength
<-1) {
78 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
82 srcLength
=(int32_t)uprv_strlen(src
);
86 // use the provided converter
87 ucnv_resetToUnicode(cnv
);
88 doCodepageCreate(src
, srcLength
, cnv
, errorCode
);
90 // use the default converter
91 cnv
=u_getDefaultConverter(&errorCode
);
92 doCodepageCreate(src
, srcLength
, cnv
, errorCode
);
93 u_releaseDefaultConverter(cnv
);
98 if(U_FAILURE(errorCode
)) {
104 //========================================
105 // Codeset conversion
106 //========================================
108 UnicodeString::extract(int32_t start
,
112 const char *codepage
) const
114 // if the arguments are illegal, then do nothing
115 if(/*dstSize < 0 || */(dstSize
> 0 && target
== 0)) {
119 // pin the indices to legal values
120 pinIndices(start
, length
);
122 // create the converter
123 UConverter
*converter
;
124 UErrorCode status
= U_ZERO_ERROR
;
126 // just write the NUL if the string length is 0
128 if(dstSize
>= 0x80000000) {
129 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
130 // make sure that the NUL-termination works (takes int32_t)
133 return u_terminateChars(target
, dstSize
, 0, &status
);
136 // if the codepage is the default, use our cache
137 // if it is an empty string, then use the "invariant character" conversion
139 converter
= u_getDefaultConverter(&status
);
140 } else if (*codepage
== 0) {
141 // use the "invariant characters" conversion
143 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
144 if(dstSize
>= 0x80000000) {
146 // make sure that the NUL-termination works (takes int32_t)
148 } else if(length
<= (int32_t)dstSize
) {
151 destLength
= (int32_t)dstSize
;
153 u_UCharsToChars(getArrayStart() + start
, target
, destLength
);
154 return u_terminateChars(target
, (int32_t)dstSize
, length
, &status
);
156 converter
= ucnv_open(codepage
, &status
);
159 length
= doExtract(start
, length
, target
, (int32_t)dstSize
, converter
, status
);
161 // close the converter
163 u_releaseDefaultConverter(converter
);
165 ucnv_close(converter
);
172 UnicodeString::extract(char *dest
, int32_t destCapacity
,
174 UErrorCode
&errorCode
) const {
175 if(U_FAILURE(errorCode
)) {
179 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
180 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
186 return u_terminateChars(dest
, destCapacity
, 0, &errorCode
);
190 UBool isDefaultConverter
;
192 isDefaultConverter
=TRUE
;
193 cnv
=u_getDefaultConverter(&errorCode
);
194 if(U_FAILURE(errorCode
)) {
198 isDefaultConverter
=FALSE
;
199 ucnv_resetFromUnicode(cnv
);
203 int32_t length
=doExtract(0, fLength
, dest
, destCapacity
, cnv
, errorCode
);
205 // release the converter
206 if(isDefaultConverter
) {
207 u_releaseDefaultConverter(cnv
);
214 UnicodeString::doExtract(int32_t start
, int32_t length
,
215 char *dest
, int32_t destCapacity
,
217 UErrorCode
&errorCode
) const {
218 if(U_FAILURE(errorCode
)) {
219 if(destCapacity
!=0) {
225 const UChar
*src
=fArray
+start
, *srcLimit
=src
+length
;
226 char *originalDest
=dest
;
227 const char *destLimit
;
229 if(destCapacity
==0) {
231 } else if(destCapacity
==-1) {
232 // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
233 destLimit
=(char*)U_MAX_PTR(dest
);
234 // for NUL-termination, translate into highest int32_t
235 destCapacity
=0x7fffffff;
237 destLimit
=dest
+destCapacity
;
240 // perform the conversion
241 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, &errorCode
);
242 length
=(int32_t)(dest
-originalDest
);
244 // if an overflow occurs, then get the preflighting length
245 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
248 destLimit
=buffer
+sizeof(buffer
);
251 errorCode
=U_ZERO_ERROR
;
252 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, &errorCode
);
253 length
+=(int32_t)(dest
-buffer
);
254 } while(errorCode
==U_BUFFER_OVERFLOW_ERROR
);
257 return u_terminateChars(originalDest
, destCapacity
, length
, &errorCode
);
261 UnicodeString::doCodepageCreate(const char *codepageData
,
263 const char *codepage
)
265 // if there's nothing to convert, do nothing
266 if(codepageData
== 0 || dataLength
== 0 || dataLength
< -1) {
269 if(dataLength
== -1) {
270 dataLength
= uprv_strlen(codepageData
);
273 UErrorCode status
= U_ZERO_ERROR
;
275 // create the converter
276 // if the codepage is the default, use our cache
277 // if it is an empty string, then use the "invariant character" conversion
278 UConverter
*converter
= (codepage
== 0 ?
279 u_getDefaultConverter(&status
) :
282 ucnv_open(codepage
, &status
));
284 // if we failed, set the appropriate flags and return
285 if(U_FAILURE(status
)) {
290 // perform the conversion
292 // use the "invariant characters" conversion
293 if(cloneArrayIfNeeded(dataLength
, dataLength
, FALSE
)) {
294 u_charsToUChars(codepageData
, getArrayStart(), dataLength
);
295 fLength
= dataLength
;
302 // convert using the real converter
303 doCodepageCreate(codepageData
, dataLength
, converter
, status
);
304 if(U_FAILURE(status
)) {
308 // close the converter
310 u_releaseDefaultConverter(converter
);
312 ucnv_close(converter
);
317 UnicodeString::doCodepageCreate(const char *codepageData
,
319 UConverter
*converter
,
320 UErrorCode
&status
) {
321 if(U_FAILURE(status
)) {
325 // set up the conversion parameters
326 const char *mySource
= codepageData
;
327 const char *mySourceEnd
= mySource
+ dataLength
;
330 // estimate the size needed:
331 // 1.25 UChar's per source byte should cover most cases
332 int32_t arraySize
= dataLength
+ (dataLength
>> 2);
334 // we do not care about the current contents
335 UBool doCopyArray
= FALSE
;
337 if(!cloneArrayIfNeeded(arraySize
, arraySize
, doCopyArray
)) {
342 // perform the conversion
343 myTarget
= fArray
+ fLength
;
344 ucnv_toUnicode(converter
, &myTarget
, fArray
+ fCapacity
,
345 &mySource
, mySourceEnd
, 0, TRUE
, &status
);
347 // update the conversion parameters
348 fLength
= (int32_t)(myTarget
- fArray
);
350 // allocate more space and copy data, if needed
351 if(status
== U_BUFFER_OVERFLOW_ERROR
) {
352 // reset the error code
353 status
= U_ZERO_ERROR
;
355 // keep the previous conversion results
358 // estimate the new size needed, larger than before
359 // try 2 UChar's per remaining source byte
360 arraySize
= (int32_t)(fLength
+ 2 * (mySourceEnd
- mySource
));