1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2000-2006, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
9 * External APIs for the ICU's codeset conversion library
12 * Modification History:
14 * Date Name Description
15 * 7/28/2000 srl Implementation
19 * @name Character Conversion C API
23 #include "unicode/utypes.h"
25 #if !UCONFIG_NO_CONVERSION
27 #include "unicode/ucnv_cb.h"
32 /* need to update the offsets when the target moves. */
33 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
34 if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within
35 the same call stack if the complexity arises. */
37 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs
*args
,
50 &args
->target
, args
->targetLimit
,
51 &args
->offsets
, offsetIndex
,
56 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs
*args
,
58 const UChar
* sourceLimit
,
63 This is a fun one. Recursion can occur - we're basically going to
64 just retry shoving data through the same converter. Note, if you got
65 here through some kind of invalid sequence, you maybe should emit a
66 reset sequence of some kind and/or call ucnv_reset(). Since this
67 IS an actual conversion, take care that you've changed the callback
68 or the data, or you'll get an infinite loop.
70 Please set the err value to something reasonable before calling
81 oldTarget
= args
->target
;
83 ucnv_fromUnicode(args
->converter
,
88 NULL
, /* no offsets */
94 while (args
->target
!= oldTarget
) /* if it moved at all.. */
96 *(args
->offsets
)++ = offsetIndex
;
102 Note, if you did something like used a Stop subcallback, things would get interesting.
103 In fact, here's where we want to return the partially consumed in-source!
105 if(*err
== U_BUFFER_OVERFLOW_ERROR
)
106 /* && (*source < sourceLimit && args->target >= args->targetLimit)
109 /* Overflowed the target. Now, we'll write into the charErrorBuffer.
110 It's a fixed size. If we overflow it... Hmm */
112 const char *newTargetLimit
;
113 UErrorCode err2
= U_ZERO_ERROR
;
117 errBuffLen
= args
->converter
->charErrorBufferLength
;
119 /* start the new target at the first free slot in the errbuff.. */
120 newTarget
= (char *)(args
->converter
->charErrorBuffer
+ errBuffLen
);
122 newTargetLimit
= (char *)(args
->converter
->charErrorBuffer
+
123 sizeof(args
->converter
->charErrorBuffer
));
125 if(newTarget
>= newTargetLimit
)
127 *err
= U_INTERNAL_PROGRAM_ERROR
;
131 /* We're going to tell the converter that the errbuff len is empty.
132 This prevents the existing errbuff from being 'flushed' out onto
133 itself. If the errbuff is needed by the converter this time,
134 we're hosed - we're out of space! */
136 args
->converter
->charErrorBufferLength
= 0;
138 ucnv_fromUnicode(args
->converter
,
147 /* We can go ahead and overwrite the length here. We know just how
148 to recalculate it. */
150 args
->converter
->charErrorBufferLength
= (int8_t)(
151 newTarget
- (char*)args
->converter
->charErrorBuffer
);
153 if((newTarget
>= newTargetLimit
) || (err2
== U_BUFFER_OVERFLOW_ERROR
))
155 /* now we're REALLY in trouble.
156 Internal program error - callback shouldn't have written this much
159 *err
= U_INTERNAL_PROGRAM_ERROR
;
163 /* sub errs could be invalid/truncated/illegal chars or w/e.
164 These might want to be passed on up.. But the problem is, we already
165 need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
176 U_CAPI
void U_EXPORT2
177 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs
*args
,
181 UConverter
*converter
;
184 if(U_FAILURE(*err
)) {
187 converter
= args
->converter
;
188 length
= converter
->subCharLen
;
196 * Write/convert the substitution string. Its real length is -length.
197 * Unlike the escape callback, we need not change the converter's
198 * callback function because ucnv_setSubstString() verified that
199 * the string can be converted, so we will not get a conversion error
200 * and will not recurse.
201 * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
203 const UChar
*source
= (const UChar
*)converter
->subChars
;
204 ucnv_cbFromUWriteUChars(args
, &source
, source
- length
, offsetIndex
, err
);
208 if(converter
->sharedData
->impl
->writeSub
!=NULL
) {
209 converter
->sharedData
->impl
->writeSub(args
, offsetIndex
, err
);
211 else if(converter
->subChar1
!=0 && (uint16_t)converter
->invalidUCharBuffer
[0]<=(uint16_t)0xffu
) {
213 TODO: Is this untestable because the MBCS converter has a writeSub function to call
214 and the other converters don't use subChar1?
216 ucnv_cbFromUWriteBytes(args
,
217 (const char *)&converter
->subChar1
, 1,
221 ucnv_cbFromUWriteBytes(args
,
222 (const char *)converter
->subChars
, length
,
227 U_CAPI
void U_EXPORT2
228 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs
*args
,
234 if(U_FAILURE(*err
)) {
241 &args
->target
, args
->targetLimit
,
242 &args
->offsets
, offsetIndex
,
246 U_CAPI
void U_EXPORT2
247 ucnv_cbToUWriteSub (UConverterToUnicodeArgs
*args
,
251 static const UChar kSubstituteChar1
= 0x1A, kSubstituteChar
= 0xFFFD;
253 /* could optimize this case, just one uchar */
254 if(args
->converter
->invalidCharLength
== 1 && args
->converter
->subChar1
!= 0) {
255 ucnv_cbToUWriteUChars(args
, &kSubstituteChar1
, 1, offsetIndex
, err
);
257 ucnv_cbToUWriteUChars(args
, &kSubstituteChar
, 1, offsetIndex
, err
);