]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ***************************************************************************** | |
3 | * | |
46f4442e | 4 | * Copyright (C) 1998-2007, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ***************************************************************************** | |
8 | * | |
9 | * ucnv_err.c | |
10 | * Implements error behaviour functions called by T_UConverter_{from,to}Unicode | |
11 | * | |
12 | * | |
13 | * Change history: | |
14 | * | |
15 | * 06/29/2000 helena Major rewrite of the callback APIs. | |
16 | */ | |
17 | ||
374ca955 A |
18 | #include "unicode/utypes.h" |
19 | ||
20 | #if !UCONFIG_NO_CONVERSION | |
21 | ||
b75a7d8f A |
22 | #include "unicode/ucnv_err.h" |
23 | #include "unicode/ucnv_cb.h" | |
24 | #include "ucnv_cnv.h" | |
25 | #include "cmemory.h" | |
26 | #include "unicode/ucnv.h" | |
27 | #include "ustrfmt.h" | |
28 | ||
29 | #define VALUE_STRING_LENGTH 32 | |
30 | /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ | |
31 | #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 | |
32 | #define UNICODE_U_CODEPOINT 0x0055 | |
33 | #define UNICODE_X_CODEPOINT 0x0058 | |
34 | #define UNICODE_RS_CODEPOINT 0x005C | |
35 | #define UNICODE_U_LOW_CODEPOINT 0x0075 | |
36 | #define UNICODE_X_LOW_CODEPOINT 0x0078 | |
37 | #define UNICODE_AMP_CODEPOINT 0x0026 | |
38 | #define UNICODE_HASH_CODEPOINT 0x0023 | |
39 | #define UNICODE_SEMICOLON_CODEPOINT 0x003B | |
40 | #define UNICODE_PLUS_CODEPOINT 0x002B | |
41 | #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B | |
42 | #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D | |
46f4442e A |
43 | #define UNICODE_SPACE_CODEPOINT 0x0020 |
44 | #define UCNV_PRV_ESCAPE_ICU 0 | |
45 | #define UCNV_PRV_ESCAPE_C 'C' | |
46 | #define UCNV_PRV_ESCAPE_XML_DEC 'D' | |
47 | #define UCNV_PRV_ESCAPE_XML_HEX 'X' | |
48 | #define UCNV_PRV_ESCAPE_JAVA 'J' | |
49 | #define UCNV_PRV_ESCAPE_UNICODE 'U' | |
50 | #define UCNV_PRV_ESCAPE_CSS2 'S' | |
51 | #define UCNV_PRV_STOP_ON_ILLEGAL 'i' | |
b75a7d8f A |
52 | |
53 | /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ | |
54 | U_CAPI void U_EXPORT2 | |
55 | UCNV_FROM_U_CALLBACK_STOP ( | |
56 | const void *context, | |
57 | UConverterFromUnicodeArgs *fromUArgs, | |
58 | const UChar* codeUnits, | |
59 | int32_t length, | |
60 | UChar32 codePoint, | |
61 | UConverterCallbackReason reason, | |
62 | UErrorCode * err) | |
63 | { | |
46f4442e A |
64 | /* the caller must have set the error code accordingly */ |
65 | return; | |
b75a7d8f A |
66 | } |
67 | ||
68 | ||
69 | /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ | |
70 | U_CAPI void U_EXPORT2 | |
71 | UCNV_TO_U_CALLBACK_STOP ( | |
72 | const void *context, | |
73 | UConverterToUnicodeArgs *toUArgs, | |
74 | const char* codePoints, | |
75 | int32_t length, | |
76 | UConverterCallbackReason reason, | |
77 | UErrorCode * err) | |
78 | { | |
46f4442e A |
79 | /* the caller must have set the error code accordingly */ |
80 | return; | |
b75a7d8f A |
81 | } |
82 | ||
83 | U_CAPI void U_EXPORT2 | |
84 | UCNV_FROM_U_CALLBACK_SKIP ( | |
85 | const void *context, | |
86 | UConverterFromUnicodeArgs *fromUArgs, | |
87 | const UChar* codeUnits, | |
88 | int32_t length, | |
89 | UChar32 codePoint, | |
90 | UConverterCallbackReason reason, | |
91 | UErrorCode * err) | |
92 | { | |
46f4442e | 93 | if (reason <= UCNV_IRREGULAR) |
b75a7d8f | 94 | { |
46f4442e | 95 | if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
b75a7d8f A |
96 | { |
97 | *err = U_ZERO_ERROR; | |
b75a7d8f | 98 | } |
46f4442e | 99 | /* else the caller must have set the error code accordingly. */ |
b75a7d8f | 100 | } |
46f4442e | 101 | /* else ignore the reset, close and clone calls. */ |
b75a7d8f A |
102 | } |
103 | ||
104 | U_CAPI void U_EXPORT2 | |
105 | UCNV_FROM_U_CALLBACK_SUBSTITUTE ( | |
106 | const void *context, | |
107 | UConverterFromUnicodeArgs *fromArgs, | |
108 | const UChar* codeUnits, | |
109 | int32_t length, | |
110 | UChar32 codePoint, | |
111 | UConverterCallbackReason reason, | |
112 | UErrorCode * err) | |
113 | { | |
46f4442e | 114 | if (reason <= UCNV_IRREGULAR) |
b75a7d8f | 115 | { |
46f4442e | 116 | if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
b75a7d8f A |
117 | { |
118 | *err = U_ZERO_ERROR; | |
119 | ucnv_cbFromUWriteSub(fromArgs, 0, err); | |
b75a7d8f | 120 | } |
46f4442e | 121 | /* else the caller must have set the error code accordingly. */ |
b75a7d8f | 122 | } |
46f4442e | 123 | /* else ignore the reset, close and clone calls. */ |
b75a7d8f A |
124 | } |
125 | ||
126 | /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, | |
127 | *uses a clean copy (resetted) of the converter, to convert that unicode | |
128 | *escape sequence to the target codepage (if conversion failure happens then | |
129 | *we revert to substituting with subchar) | |
130 | */ | |
131 | U_CAPI void U_EXPORT2 | |
132 | UCNV_FROM_U_CALLBACK_ESCAPE ( | |
133 | const void *context, | |
134 | UConverterFromUnicodeArgs *fromArgs, | |
135 | const UChar *codeUnits, | |
136 | int32_t length, | |
137 | UChar32 codePoint, | |
138 | UConverterCallbackReason reason, | |
139 | UErrorCode * err) | |
140 | { | |
141 | ||
142 | UChar valueString[VALUE_STRING_LENGTH]; | |
143 | int32_t valueStringLength = 0; | |
144 | int32_t i = 0; | |
145 | ||
146 | const UChar *myValueSource = NULL; | |
147 | UErrorCode err2 = U_ZERO_ERROR; | |
148 | UConverterFromUCallback original = NULL; | |
149 | const void *originalContext; | |
150 | ||
151 | UConverterFromUCallback ignoredCallback = NULL; | |
152 | const void *ignoredContext; | |
153 | ||
154 | if (reason > UCNV_IRREGULAR) | |
155 | { | |
46f4442e | 156 | return; |
b75a7d8f A |
157 | } |
158 | ||
159 | ucnv_setFromUCallBack (fromArgs->converter, | |
160 | (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, | |
161 | NULL, | |
162 | &original, | |
163 | &originalContext, | |
164 | &err2); | |
165 | ||
166 | if (U_FAILURE (err2)) | |
167 | { | |
168 | *err = err2; | |
169 | return; | |
170 | } | |
171 | if(context==NULL) | |
172 | { | |
173 | while (i < length) | |
174 | { | |
175 | valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ | |
176 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
177 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); | |
178 | } | |
179 | } | |
180 | else | |
181 | { | |
182 | switch(*((char*)context)) | |
183 | { | |
46f4442e | 184 | case UCNV_PRV_ESCAPE_JAVA: |
b75a7d8f A |
185 | while (i < length) |
186 | { | |
46f4442e A |
187 | valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ |
188 | valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ | |
189 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); | |
b75a7d8f A |
190 | } |
191 | break; | |
192 | ||
46f4442e A |
193 | case UCNV_PRV_ESCAPE_C: |
194 | valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ | |
b75a7d8f | 195 | |
46f4442e A |
196 | if(length==2){ |
197 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
198 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); | |
199 | ||
200 | } | |
201 | else{ | |
202 | valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ | |
203 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); | |
204 | } | |
b75a7d8f A |
205 | break; |
206 | ||
46f4442e A |
207 | case UCNV_PRV_ESCAPE_XML_DEC: |
208 | ||
209 | valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ | |
210 | valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
211 | if(length==2){ | |
212 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); | |
213 | } | |
214 | else{ | |
215 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); | |
216 | } | |
217 | valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
b75a7d8f A |
218 | break; |
219 | ||
46f4442e | 220 | case UCNV_PRV_ESCAPE_XML_HEX: |
b75a7d8f | 221 | |
46f4442e A |
222 | valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ |
223 | valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
224 | valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ | |
225 | if(length==2){ | |
226 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); | |
227 | } | |
228 | else{ | |
229 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); | |
230 | } | |
231 | valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
b75a7d8f A |
232 | break; |
233 | ||
46f4442e | 234 | case UCNV_PRV_ESCAPE_UNICODE: |
b75a7d8f A |
235 | valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ |
236 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
237 | valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ | |
238 | if (length == 2) { | |
46f4442e | 239 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); |
b75a7d8f | 240 | } else { |
46f4442e | 241 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); |
b75a7d8f A |
242 | } |
243 | valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ | |
244 | break; | |
245 | ||
46f4442e A |
246 | case UCNV_PRV_ESCAPE_CSS2: |
247 | valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ | |
248 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); | |
249 | /* Always add space character, becase the next character might be whitespace, | |
250 | which would erroneously be considered the termination of the escape sequence. */ | |
251 | valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; | |
252 | break; | |
253 | ||
254 | default: | |
b75a7d8f A |
255 | while (i < length) |
256 | { | |
46f4442e A |
257 | valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ |
258 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
259 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); | |
b75a7d8f A |
260 | } |
261 | } | |
b75a7d8f A |
262 | } |
263 | myValueSource = valueString; | |
264 | ||
265 | /* reset the error */ | |
266 | *err = U_ZERO_ERROR; | |
267 | ||
268 | ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); | |
269 | ||
270 | ucnv_setFromUCallBack (fromArgs->converter, | |
271 | original, | |
272 | originalContext, | |
273 | &ignoredCallback, | |
274 | &ignoredContext, | |
275 | &err2); | |
276 | if (U_FAILURE (err2)) | |
46f4442e | 277 | { |
b75a7d8f A |
278 | *err = err2; |
279 | return; | |
46f4442e | 280 | } |
b75a7d8f A |
281 | |
282 | return; | |
283 | } | |
284 | ||
285 | ||
286 | ||
287 | U_CAPI void U_EXPORT2 | |
288 | UCNV_TO_U_CALLBACK_SKIP ( | |
289 | const void *context, | |
290 | UConverterToUnicodeArgs *toArgs, | |
291 | const char* codeUnits, | |
292 | int32_t length, | |
293 | UConverterCallbackReason reason, | |
294 | UErrorCode * err) | |
295 | { | |
46f4442e | 296 | if (reason <= UCNV_IRREGULAR) |
b75a7d8f | 297 | { |
46f4442e | 298 | if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
b75a7d8f A |
299 | { |
300 | *err = U_ZERO_ERROR; | |
b75a7d8f | 301 | } |
46f4442e | 302 | /* else the caller must have set the error code accordingly. */ |
b75a7d8f | 303 | } |
46f4442e | 304 | /* else ignore the reset, close and clone calls. */ |
b75a7d8f A |
305 | } |
306 | ||
307 | U_CAPI void U_EXPORT2 | |
308 | UCNV_TO_U_CALLBACK_SUBSTITUTE ( | |
309 | const void *context, | |
310 | UConverterToUnicodeArgs *toArgs, | |
311 | const char* codeUnits, | |
312 | int32_t length, | |
313 | UConverterCallbackReason reason, | |
314 | UErrorCode * err) | |
315 | { | |
46f4442e | 316 | if (reason <= UCNV_IRREGULAR) |
b75a7d8f | 317 | { |
46f4442e | 318 | if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
b75a7d8f A |
319 | { |
320 | *err = U_ZERO_ERROR; | |
321 | ucnv_cbToUWriteSub(toArgs,0,err); | |
b75a7d8f | 322 | } |
46f4442e | 323 | /* else the caller must have set the error code accordingly. */ |
b75a7d8f | 324 | } |
46f4442e | 325 | /* else ignore the reset, close and clone calls. */ |
b75a7d8f A |
326 | } |
327 | ||
328 | /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, | |
329 | *and uses that as the substitution sequence | |
330 | */ | |
331 | U_CAPI void U_EXPORT2 | |
332 | UCNV_TO_U_CALLBACK_ESCAPE ( | |
333 | const void *context, | |
334 | UConverterToUnicodeArgs *toArgs, | |
335 | const char* codeUnits, | |
336 | int32_t length, | |
337 | UConverterCallbackReason reason, | |
338 | UErrorCode * err) | |
339 | { | |
340 | UChar uniValueString[VALUE_STRING_LENGTH]; | |
341 | int32_t valueStringLength = 0; | |
342 | int32_t i = 0; | |
343 | ||
344 | if (reason > UCNV_IRREGULAR) | |
345 | { | |
346 | return; | |
347 | } | |
348 | ||
46f4442e A |
349 | if(context==NULL) |
350 | { | |
351 | while (i < length) | |
352 | { | |
353 | uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ | |
354 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ | |
355 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); | |
356 | } | |
b75a7d8f | 357 | } |
46f4442e A |
358 | else |
359 | { | |
360 | switch(*((char*)context)) | |
361 | { | |
b75a7d8f | 362 | case UCNV_PRV_ESCAPE_XML_DEC: |
46f4442e A |
363 | while (i < length) |
364 | { | |
b75a7d8f A |
365 | uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ |
366 | uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
367 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); | |
368 | uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
46f4442e A |
369 | } |
370 | break; | |
b75a7d8f A |
371 | |
372 | case UCNV_PRV_ESCAPE_XML_HEX: | |
46f4442e A |
373 | while (i < length) |
374 | { | |
b75a7d8f A |
375 | uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ |
376 | uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
377 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ | |
378 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); | |
379 | uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
46f4442e A |
380 | } |
381 | break; | |
b75a7d8f | 382 | case UCNV_PRV_ESCAPE_C: |
46f4442e A |
383 | while (i < length) |
384 | { | |
b75a7d8f A |
385 | uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ |
386 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ | |
387 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); | |
46f4442e A |
388 | } |
389 | break; | |
b75a7d8f | 390 | default: |
46f4442e A |
391 | while (i < length) |
392 | { | |
b75a7d8f A |
393 | uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ |
394 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ | |
395 | uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); | |
396 | valueStringLength += 2; | |
46f4442e A |
397 | } |
398 | } | |
399 | } | |
b75a7d8f A |
400 | /* reset the error */ |
401 | *err = U_ZERO_ERROR; | |
402 | ||
403 | ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); | |
404 | } | |
374ca955 A |
405 | |
406 | #endif |