]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucnv_cb.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / ucnv_cb.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2000-2006, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * ucnv_cb.c:
9 * External APIs for the ICU's codeset conversion library
10 * Helena Shih
11 *
12 * Modification History:
13 *
14 * Date Name Description
15 * 7/28/2000 srl Implementation
16 */
17
18 /**
19 * @name Character Conversion C API
20 *
21 */
22
23 #include "unicode/utypes.h"
24
25 #if !UCONFIG_NO_CONVERSION
26
27 #include "unicode/ucnv_cb.h"
28 #include "ucnv_bld.h"
29 #include "ucnv_cnv.h"
30 #include "cmemory.h"
31
32 /* need to update the offsets when the target moves. */
33 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
34 if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within
35 the same call stack if the complexity arises. */
36 U_CAPI void U_EXPORT2
37 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
38 const char* source,
39 int32_t length,
40 int32_t offsetIndex,
41 UErrorCode * err)
42 {
43 if(U_FAILURE(*err)) {
44 return;
45 }
46
47 ucnv_fromUWriteBytes(
48 args->converter,
49 source, length,
50 &args->target, args->targetLimit,
51 &args->offsets, offsetIndex,
52 err);
53 }
54
55 U_CAPI void U_EXPORT2
56 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
57 const UChar** source,
58 const UChar* sourceLimit,
59 int32_t offsetIndex,
60 UErrorCode * err)
61 {
62 /*
63 This is a fun one. Recursion can occur - we're basically going to
64 just retry shoving data through the same converter. Note, if you got
65 here through some kind of invalid sequence, you maybe should emit a
66 reset sequence of some kind and/or call ucnv_reset(). Since this
67 IS an actual conversion, take care that you've changed the callback
68 or the data, or you'll get an infinite loop.
69
70 Please set the err value to something reasonable before calling
71 into this.
72 */
73
74 char *oldTarget;
75
76 if(U_FAILURE(*err))
77 {
78 return;
79 }
80
81 oldTarget = args->target;
82
83 ucnv_fromUnicode(args->converter,
84 &args->target,
85 args->targetLimit,
86 source,
87 sourceLimit,
88 NULL, /* no offsets */
89 FALSE, /* no flush */
90 err);
91
92 if(args->offsets)
93 {
94 while (args->target != oldTarget) /* if it moved at all.. */
95 {
96 *(args->offsets)++ = offsetIndex;
97 oldTarget++;
98 }
99 }
100
101 /*
102 Note, if you did something like used a Stop subcallback, things would get interesting.
103 In fact, here's where we want to return the partially consumed in-source!
104 */
105 if(*err == U_BUFFER_OVERFLOW_ERROR)
106 /* && (*source < sourceLimit && args->target >= args->targetLimit)
107 -- S. Hrcek */
108 {
109 /* Overflowed the target. Now, we'll write into the charErrorBuffer.
110 It's a fixed size. If we overflow it... Hmm */
111 char *newTarget;
112 const char *newTargetLimit;
113 UErrorCode err2 = U_ZERO_ERROR;
114
115 int8_t errBuffLen;
116
117 errBuffLen = args->converter->charErrorBufferLength;
118
119 /* start the new target at the first free slot in the errbuff.. */
120 newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
121
122 newTargetLimit = (char *)(args->converter->charErrorBuffer +
123 sizeof(args->converter->charErrorBuffer));
124
125 if(newTarget >= newTargetLimit)
126 {
127 *err = U_INTERNAL_PROGRAM_ERROR;
128 return;
129 }
130
131 /* We're going to tell the converter that the errbuff len is empty.
132 This prevents the existing errbuff from being 'flushed' out onto
133 itself. If the errbuff is needed by the converter this time,
134 we're hosed - we're out of space! */
135
136 args->converter->charErrorBufferLength = 0;
137
138 ucnv_fromUnicode(args->converter,
139 &newTarget,
140 newTargetLimit,
141 source,
142 sourceLimit,
143 NULL,
144 FALSE,
145 &err2);
146
147 /* We can go ahead and overwrite the length here. We know just how
148 to recalculate it. */
149
150 args->converter->charErrorBufferLength = (int8_t)(
151 newTarget - (char*)args->converter->charErrorBuffer);
152
153 if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
154 {
155 /* now we're REALLY in trouble.
156 Internal program error - callback shouldn't have written this much
157 data!
158 */
159 *err = U_INTERNAL_PROGRAM_ERROR;
160 return;
161 }
162 /*else {*/
163 /* sub errs could be invalid/truncated/illegal chars or w/e.
164 These might want to be passed on up.. But the problem is, we already
165 need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
166 other errs.. */
167
168 /*
169 if(U_FAILURE(err2))
170 ??
171 */
172 /*}*/
173 }
174 }
175
176 U_CAPI void U_EXPORT2
177 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
178 int32_t offsetIndex,
179 UErrorCode * err)
180 {
181 UConverter *converter;
182 int32_t length;
183
184 if(U_FAILURE(*err)) {
185 return;
186 }
187 converter = args->converter;
188 length = converter->subCharLen;
189
190 if(length == 0) {
191 return;
192 }
193
194 if(length < 0) {
195 /*
196 * Write/convert the substitution string. Its real length is -length.
197 * Unlike the escape callback, we need not change the converter's
198 * callback function because ucnv_setSubstString() verified that
199 * the string can be converted, so we will not get a conversion error
200 * and will not recurse.
201 * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
202 */
203 const UChar *source = (const UChar *)converter->subChars;
204 ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
205 return;
206 }
207
208 if(converter->sharedData->impl->writeSub!=NULL) {
209 converter->sharedData->impl->writeSub(args, offsetIndex, err);
210 }
211 else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
212 /*
213 TODO: Is this untestable because the MBCS converter has a writeSub function to call
214 and the other converters don't use subChar1?
215 */
216 ucnv_cbFromUWriteBytes(args,
217 (const char *)&converter->subChar1, 1,
218 offsetIndex, err);
219 }
220 else {
221 ucnv_cbFromUWriteBytes(args,
222 (const char *)converter->subChars, length,
223 offsetIndex, err);
224 }
225 }
226
227 U_CAPI void U_EXPORT2
228 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
229 const UChar* source,
230 int32_t length,
231 int32_t offsetIndex,
232 UErrorCode * err)
233 {
234 if(U_FAILURE(*err)) {
235 return;
236 }
237
238 ucnv_toUWriteUChars(
239 args->converter,
240 source, length,
241 &args->target, args->targetLimit,
242 &args->offsets, offsetIndex,
243 err);
244 }
245
246 U_CAPI void U_EXPORT2
247 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
248 int32_t offsetIndex,
249 UErrorCode * err)
250 {
251 static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
252
253 /* could optimize this case, just one uchar */
254 if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
255 ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
256 } else {
257 ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
258 }
259 }
260
261 #endif