]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ***************************************************************************** | |
3 | * | |
374ca955 | 4 | * Copyright (C) 1998-2004, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ***************************************************************************** | |
8 | * | |
9 | * ucnv_err.c | |
10 | * Implements error behaviour functions called by T_UConverter_{from,to}Unicode | |
11 | * | |
12 | * | |
13 | * Change history: | |
14 | * | |
15 | * 06/29/2000 helena Major rewrite of the callback APIs. | |
16 | */ | |
17 | ||
374ca955 A |
18 | #include "unicode/utypes.h" |
19 | ||
20 | #if !UCONFIG_NO_CONVERSION | |
21 | ||
b75a7d8f A |
22 | #include "unicode/ucnv_err.h" |
23 | #include "unicode/ucnv_cb.h" | |
24 | #include "ucnv_cnv.h" | |
25 | #include "cmemory.h" | |
26 | #include "unicode/ucnv.h" | |
27 | #include "ustrfmt.h" | |
28 | ||
29 | #define VALUE_STRING_LENGTH 32 | |
30 | /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ | |
31 | #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 | |
32 | #define UNICODE_U_CODEPOINT 0x0055 | |
33 | #define UNICODE_X_CODEPOINT 0x0058 | |
34 | #define UNICODE_RS_CODEPOINT 0x005C | |
35 | #define UNICODE_U_LOW_CODEPOINT 0x0075 | |
36 | #define UNICODE_X_LOW_CODEPOINT 0x0078 | |
37 | #define UNICODE_AMP_CODEPOINT 0x0026 | |
38 | #define UNICODE_HASH_CODEPOINT 0x0023 | |
39 | #define UNICODE_SEMICOLON_CODEPOINT 0x003B | |
40 | #define UNICODE_PLUS_CODEPOINT 0x002B | |
41 | #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B | |
42 | #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D | |
43 | #define UCNV_PRV_ESCAPE_ICU 0 | |
44 | #define UCNV_PRV_ESCAPE_C 'C' | |
45 | #define UCNV_PRV_ESCAPE_XML_DEC 'D' | |
46 | #define UCNV_PRV_ESCAPE_XML_HEX 'X' | |
47 | #define UCNV_PRV_ESCAPE_JAVA 'J' | |
48 | #define UCNV_PRV_ESCAPE_UNICODE 'U' | |
49 | ||
50 | /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ | |
51 | U_CAPI void U_EXPORT2 | |
52 | UCNV_FROM_U_CALLBACK_STOP ( | |
53 | const void *context, | |
54 | UConverterFromUnicodeArgs *fromUArgs, | |
55 | const UChar* codeUnits, | |
56 | int32_t length, | |
57 | UChar32 codePoint, | |
58 | UConverterCallbackReason reason, | |
59 | UErrorCode * err) | |
60 | { | |
61 | /* the caller must have set the error code accordingly */ | |
62 | return; | |
63 | } | |
64 | ||
65 | ||
66 | /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ | |
67 | U_CAPI void U_EXPORT2 | |
68 | UCNV_TO_U_CALLBACK_STOP ( | |
69 | const void *context, | |
70 | UConverterToUnicodeArgs *toUArgs, | |
71 | const char* codePoints, | |
72 | int32_t length, | |
73 | UConverterCallbackReason reason, | |
74 | UErrorCode * err) | |
75 | { | |
76 | /* the caller must have set the error code accordingly */ | |
77 | return; | |
78 | } | |
79 | ||
80 | U_CAPI void U_EXPORT2 | |
81 | UCNV_FROM_U_CALLBACK_SKIP ( | |
82 | const void *context, | |
83 | UConverterFromUnicodeArgs *fromUArgs, | |
84 | const UChar* codeUnits, | |
85 | int32_t length, | |
86 | UChar32 codePoint, | |
87 | UConverterCallbackReason reason, | |
88 | UErrorCode * err) | |
89 | { | |
90 | if(context==NULL) | |
91 | { | |
92 | if (reason <= UCNV_IRREGULAR) | |
93 | { | |
94 | *err = U_ZERO_ERROR; | |
95 | return; | |
96 | } | |
97 | ||
98 | } | |
99 | else if(*(char*)context=='i') | |
100 | { | |
101 | if(reason != UCNV_UNASSIGNED) | |
102 | { | |
103 | /* the caller must have set | |
104 | * the error code accordingly | |
105 | */ | |
106 | return; | |
107 | } | |
108 | else | |
109 | { | |
110 | *err = U_ZERO_ERROR; | |
111 | return; | |
112 | } | |
113 | } | |
114 | } | |
115 | ||
116 | U_CAPI void U_EXPORT2 | |
117 | UCNV_FROM_U_CALLBACK_SUBSTITUTE ( | |
118 | const void *context, | |
119 | UConverterFromUnicodeArgs *fromArgs, | |
120 | const UChar* codeUnits, | |
121 | int32_t length, | |
122 | UChar32 codePoint, | |
123 | UConverterCallbackReason reason, | |
124 | UErrorCode * err) | |
125 | { | |
126 | if(context == NULL) | |
127 | { | |
128 | if (reason > UCNV_IRREGULAR) | |
129 | { | |
130 | return; | |
131 | } | |
132 | ||
133 | *err = U_ZERO_ERROR; | |
134 | ucnv_cbFromUWriteSub(fromArgs, 0, err); | |
135 | return; | |
136 | } | |
137 | else if(*((char*)context)=='i') | |
138 | { | |
139 | if(reason != UCNV_UNASSIGNED) | |
140 | { | |
141 | /* the caller must have set | |
142 | * the error code accordingly | |
143 | */ | |
144 | return; | |
145 | } | |
146 | else | |
147 | { | |
148 | *err = U_ZERO_ERROR; | |
149 | ucnv_cbFromUWriteSub(fromArgs, 0, err); | |
150 | return; | |
151 | } | |
152 | } | |
153 | } | |
154 | ||
155 | /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, | |
156 | *uses a clean copy (resetted) of the converter, to convert that unicode | |
157 | *escape sequence to the target codepage (if conversion failure happens then | |
158 | *we revert to substituting with subchar) | |
159 | */ | |
160 | U_CAPI void U_EXPORT2 | |
161 | UCNV_FROM_U_CALLBACK_ESCAPE ( | |
162 | const void *context, | |
163 | UConverterFromUnicodeArgs *fromArgs, | |
164 | const UChar *codeUnits, | |
165 | int32_t length, | |
166 | UChar32 codePoint, | |
167 | UConverterCallbackReason reason, | |
168 | UErrorCode * err) | |
169 | { | |
170 | ||
171 | UChar valueString[VALUE_STRING_LENGTH]; | |
172 | int32_t valueStringLength = 0; | |
173 | int32_t i = 0; | |
174 | ||
175 | const UChar *myValueSource = NULL; | |
176 | UErrorCode err2 = U_ZERO_ERROR; | |
177 | UConverterFromUCallback original = NULL; | |
178 | const void *originalContext; | |
179 | ||
180 | UConverterFromUCallback ignoredCallback = NULL; | |
181 | const void *ignoredContext; | |
182 | ||
183 | if (reason > UCNV_IRREGULAR) | |
184 | { | |
185 | return; | |
186 | } | |
187 | ||
188 | ucnv_setFromUCallBack (fromArgs->converter, | |
189 | (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, | |
190 | NULL, | |
191 | &original, | |
192 | &originalContext, | |
193 | &err2); | |
194 | ||
195 | if (U_FAILURE (err2)) | |
196 | { | |
197 | *err = err2; | |
198 | return; | |
199 | } | |
200 | if(context==NULL) | |
201 | { | |
202 | while (i < length) | |
203 | { | |
204 | valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ | |
205 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
206 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); | |
207 | } | |
208 | } | |
209 | else | |
210 | { | |
211 | switch(*((char*)context)) | |
212 | { | |
213 | case UCNV_PRV_ESCAPE_JAVA: | |
214 | while (i < length) | |
215 | { | |
216 | valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ | |
217 | valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ | |
218 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); | |
219 | } | |
220 | break; | |
221 | ||
222 | case UCNV_PRV_ESCAPE_C: | |
223 | valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ | |
224 | ||
225 | if(length==2){ | |
226 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
227 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); | |
228 | ||
229 | } | |
230 | else{ | |
231 | valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ | |
232 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); | |
233 | } | |
234 | break; | |
235 | ||
236 | case UCNV_PRV_ESCAPE_XML_DEC: | |
237 | ||
238 | valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ | |
239 | valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
240 | if(length==2){ | |
241 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); | |
242 | } | |
243 | else{ | |
244 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); | |
245 | } | |
246 | valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
247 | break; | |
248 | ||
249 | case UCNV_PRV_ESCAPE_XML_HEX: | |
250 | ||
251 | valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ | |
252 | valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
253 | valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ | |
254 | if(length==2){ | |
255 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); | |
256 | } | |
257 | else{ | |
258 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); | |
259 | } | |
260 | valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
261 | break; | |
262 | ||
263 | case UCNV_PRV_ESCAPE_UNICODE: | |
264 | valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ | |
265 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
266 | valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ | |
267 | if (length == 2) { | |
268 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); | |
269 | } else { | |
270 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); | |
271 | } | |
272 | valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ | |
273 | break; | |
274 | ||
275 | default: | |
276 | while (i < length) | |
277 | { | |
278 | valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ | |
279 | valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ | |
280 | valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); | |
281 | } | |
282 | } | |
283 | ||
284 | } | |
285 | myValueSource = valueString; | |
286 | ||
287 | /* reset the error */ | |
288 | *err = U_ZERO_ERROR; | |
289 | ||
290 | ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); | |
291 | ||
292 | ucnv_setFromUCallBack (fromArgs->converter, | |
293 | original, | |
294 | originalContext, | |
295 | &ignoredCallback, | |
296 | &ignoredContext, | |
297 | &err2); | |
298 | if (U_FAILURE (err2)) | |
299 | { | |
300 | *err = err2; | |
301 | return; | |
302 | } | |
303 | ||
304 | return; | |
305 | } | |
306 | ||
307 | ||
308 | ||
309 | U_CAPI void U_EXPORT2 | |
310 | UCNV_TO_U_CALLBACK_SKIP ( | |
311 | const void *context, | |
312 | UConverterToUnicodeArgs *toArgs, | |
313 | const char* codeUnits, | |
314 | int32_t length, | |
315 | UConverterCallbackReason reason, | |
316 | UErrorCode * err) | |
317 | { | |
318 | if(context==NULL) | |
319 | { | |
320 | if (reason <= UCNV_IRREGULAR) | |
321 | { | |
322 | *err = U_ZERO_ERROR; | |
323 | return; | |
324 | } | |
325 | ||
326 | } | |
327 | else if(*((char*)context)=='i') | |
328 | { | |
329 | if(reason != UCNV_UNASSIGNED) | |
330 | { | |
331 | /* the caller must have set | |
332 | * the error code accordingly | |
333 | */ | |
334 | return; | |
335 | } | |
336 | else | |
337 | { | |
338 | *err = U_ZERO_ERROR; | |
339 | return; | |
340 | } | |
341 | } | |
342 | } | |
343 | ||
344 | U_CAPI void U_EXPORT2 | |
345 | UCNV_TO_U_CALLBACK_SUBSTITUTE ( | |
346 | const void *context, | |
347 | UConverterToUnicodeArgs *toArgs, | |
348 | const char* codeUnits, | |
349 | int32_t length, | |
350 | UConverterCallbackReason reason, | |
351 | UErrorCode * err) | |
352 | { | |
353 | if(context == NULL) | |
354 | { | |
355 | if (reason > UCNV_IRREGULAR) | |
356 | { | |
357 | return; | |
358 | } | |
359 | ||
360 | *err = U_ZERO_ERROR; | |
361 | ucnv_cbToUWriteSub(toArgs,0,err); | |
362 | return; | |
363 | } | |
364 | else if(*((char*)context)=='i') | |
365 | { | |
366 | if(reason != UCNV_UNASSIGNED) | |
367 | { | |
368 | /* the caller must have set | |
369 | * the error code accordingly | |
370 | */ | |
371 | return; | |
372 | } | |
373 | else | |
374 | { | |
375 | *err = U_ZERO_ERROR; | |
376 | ucnv_cbToUWriteSub(toArgs,0,err); | |
377 | return; | |
378 | } | |
379 | } | |
380 | ||
381 | } | |
382 | ||
383 | /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, | |
384 | *and uses that as the substitution sequence | |
385 | */ | |
386 | U_CAPI void U_EXPORT2 | |
387 | UCNV_TO_U_CALLBACK_ESCAPE ( | |
388 | const void *context, | |
389 | UConverterToUnicodeArgs *toArgs, | |
390 | const char* codeUnits, | |
391 | int32_t length, | |
392 | UConverterCallbackReason reason, | |
393 | UErrorCode * err) | |
394 | { | |
395 | UChar uniValueString[VALUE_STRING_LENGTH]; | |
396 | int32_t valueStringLength = 0; | |
397 | int32_t i = 0; | |
398 | ||
399 | if (reason > UCNV_IRREGULAR) | |
400 | { | |
401 | return; | |
402 | } | |
403 | ||
404 | if(context==NULL) | |
405 | { | |
406 | while (i < length) | |
407 | { | |
408 | uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ | |
409 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ | |
410 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); | |
411 | } | |
412 | } | |
413 | else | |
414 | { | |
415 | switch(*((char*)context)) | |
416 | { | |
417 | case UCNV_PRV_ESCAPE_XML_DEC: | |
418 | while (i < length) | |
419 | { | |
420 | uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ | |
421 | uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
422 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); | |
423 | uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
424 | } | |
425 | break; | |
426 | ||
427 | case UCNV_PRV_ESCAPE_XML_HEX: | |
428 | while (i < length) | |
429 | { | |
430 | uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ | |
431 | uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ | |
432 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ | |
433 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); | |
434 | uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ | |
435 | } | |
436 | break; | |
437 | case UCNV_PRV_ESCAPE_C: | |
438 | while (i < length) | |
439 | { | |
440 | uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ | |
441 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ | |
442 | valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); | |
443 | } | |
444 | break; | |
445 | default: | |
446 | while (i < length) | |
447 | { | |
448 | uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ | |
449 | uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ | |
450 | uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); | |
451 | valueStringLength += 2; | |
452 | } | |
453 | } | |
454 | } | |
455 | /* reset the error */ | |
456 | *err = U_ZERO_ERROR; | |
457 | ||
458 | ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); | |
459 | } | |
374ca955 A |
460 | |
461 | #endif |