2 *****************************************************************************
4 * Copyright (C) 1998-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *****************************************************************************
10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
15 * 06/29/2000 helena Major rewrite of the callback APIs.
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_CONVERSION
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
26 #include "unicode/ucnv.h"
29 #define VALUE_STRING_LENGTH 48
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
32 #define UNICODE_U_CODEPOINT 0x0055
33 #define UNICODE_X_CODEPOINT 0x0058
34 #define UNICODE_RS_CODEPOINT 0x005C
35 #define UNICODE_U_LOW_CODEPOINT 0x0075
36 #define UNICODE_X_LOW_CODEPOINT 0x0078
37 #define UNICODE_AMP_CODEPOINT 0x0026
38 #define UNICODE_HASH_CODEPOINT 0x0023
39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B
40 #define UNICODE_PLUS_CODEPOINT 0x002B
41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
43 #define UNICODE_SPACE_CODEPOINT 0x0020
44 #define UCNV_PRV_ESCAPE_ICU 0
45 #define UCNV_PRV_ESCAPE_C 'C'
46 #define UCNV_PRV_ESCAPE_XML_DEC 'D'
47 #define UCNV_PRV_ESCAPE_XML_HEX 'X'
48 #define UCNV_PRV_ESCAPE_JAVA 'J'
49 #define UCNV_PRV_ESCAPE_UNICODE 'U'
50 #define UCNV_PRV_ESCAPE_CSS2 'S'
51 #define UCNV_PRV_STOP_ON_ILLEGAL 'i'
54 * IS_DEFAULT_IGNORABLE_CODE_POINT
55 * This is to check if a code point has the default ignorable unicode property.
56 * As such, this list needs to be updated if the ignorable code point list ever
58 * To avoid dependency on other code, this list is hard coded here.
59 * When an ignorable code point is found and is unmappable, the default callbacks
61 * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
63 * This list should be sync with the one in CharsetCallback.java
65 #define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
71 (0x17B4 <= c && c <= 0x17B5) || \
72 (0x180B <= c && c <= 0x180E) || \
73 (0x200B <= c && c <= 0x200F) || \
74 (0x202A <= c && c <= 0x202E) || \
76 (0x2066 <= c && c <= 0x2069) || \
77 (0x2061 <= c && c <= 0x2064) || \
78 (0x206A <= c && c <= 0x206F) || \
80 (0x0FE00 <= c && c <= 0x0FE0F) || \
83 (0x01BCA0 <= c && c <= 0x01BCA3) || \
84 (0x01D173 <= c && c <= 0x01D17A) || \
86 (0x0E0020 <= c && c <= 0x0E007F) || \
87 (0x0E0100 <= c && c <= 0x0E01EF) || \
89 (0x0FFF0 <= c && c <= 0x0FFF8) || \
91 (0x0E0002 <= c && c <= 0x0E001F) || \
92 (0x0E0080 <= c && c <= 0x0E00FF) || \
93 (0x0E01F0 <= c && c <= 0x0E0FFF) \
97 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
99 UCNV_FROM_U_CALLBACK_STOP (
101 UConverterFromUnicodeArgs
*fromUArgs
,
102 const UChar
* codeUnits
,
105 UConverterCallbackReason reason
,
108 if (reason
== UCNV_UNASSIGNED
&& IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint
))
111 * Skip if the codepoint has unicode property of default ignorable.
115 /* the caller must have set the error code accordingly */
120 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
121 U_CAPI
void U_EXPORT2
122 UCNV_TO_U_CALLBACK_STOP (
124 UConverterToUnicodeArgs
*toUArgs
,
125 const char* codePoints
,
127 UConverterCallbackReason reason
,
130 /* the caller must have set the error code accordingly */
134 U_CAPI
void U_EXPORT2
135 UCNV_FROM_U_CALLBACK_SKIP (
137 UConverterFromUnicodeArgs
*fromUArgs
,
138 const UChar
* codeUnits
,
141 UConverterCallbackReason reason
,
144 if (reason
<= UCNV_IRREGULAR
)
146 if (reason
== UCNV_UNASSIGNED
&& IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint
))
149 * Skip if the codepoint has unicode property of default ignorable.
153 else if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
157 /* else the caller must have set the error code accordingly. */
159 /* else ignore the reset, close and clone calls. */
162 U_CAPI
void U_EXPORT2
163 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
165 UConverterFromUnicodeArgs
*fromArgs
,
166 const UChar
* codeUnits
,
169 UConverterCallbackReason reason
,
172 if (reason
<= UCNV_IRREGULAR
)
174 if (reason
== UCNV_UNASSIGNED
&& IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint
))
177 * Skip if the codepoint has unicode property of default ignorable.
181 else if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
184 ucnv_cbFromUWriteSub(fromArgs
, 0, err
);
186 /* else the caller must have set the error code accordingly. */
188 /* else ignore the reset, close and clone calls. */
191 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
192 *uses a clean copy (resetted) of the converter, to convert that unicode
193 *escape sequence to the target codepage (if conversion failure happens then
194 *we revert to substituting with subchar)
196 U_CAPI
void U_EXPORT2
197 UCNV_FROM_U_CALLBACK_ESCAPE (
199 UConverterFromUnicodeArgs
*fromArgs
,
200 const UChar
*codeUnits
,
203 UConverterCallbackReason reason
,
207 UChar valueString
[VALUE_STRING_LENGTH
];
208 int32_t valueStringLength
= 0;
211 const UChar
*myValueSource
= NULL
;
212 UErrorCode err2
= U_ZERO_ERROR
;
213 UConverterFromUCallback original
= NULL
;
214 const void *originalContext
;
216 UConverterFromUCallback ignoredCallback
= NULL
;
217 const void *ignoredContext
;
219 if (reason
> UCNV_IRREGULAR
)
223 else if (reason
== UCNV_UNASSIGNED
&& IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint
))
226 * Skip if the codepoint has unicode property of default ignorable.
232 ucnv_setFromUCallBack (fromArgs
->converter
,
233 (UConverterFromUCallback
) UCNV_FROM_U_CALLBACK_SUBSTITUTE
,
239 if (U_FAILURE (err2
))
248 valueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
249 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
250 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
255 switch(*((char*)context
))
257 case UCNV_PRV_ESCAPE_JAVA
:
260 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
261 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_LOW_CODEPOINT
; /* adding u */
262 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
266 case UCNV_PRV_ESCAPE_C
:
267 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
270 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
271 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 8);
275 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_LOW_CODEPOINT
; /* adding u */
276 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 4);
280 case UCNV_PRV_ESCAPE_XML_DEC
:
282 valueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
283 valueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
285 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 10, 0);
288 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 10, 0);
290 valueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
293 case UCNV_PRV_ESCAPE_XML_HEX
:
295 valueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
296 valueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
297 valueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
299 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 0);
302 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 0);
304 valueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
307 case UCNV_PRV_ESCAPE_UNICODE
:
308 valueString
[valueStringLength
++] = (UChar
) UNICODE_LEFT_CURLY_CODEPOINT
; /* adding { */
309 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
310 valueString
[valueStringLength
++] = (UChar
) UNICODE_PLUS_CODEPOINT
; /* adding + */
312 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 4);
314 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 4);
316 valueString
[valueStringLength
++] = (UChar
) UNICODE_RIGHT_CURLY_CODEPOINT
; /* adding } */
319 case UCNV_PRV_ESCAPE_CSS2
:
320 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
321 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 0);
322 /* Always add space character, becase the next character might be whitespace,
323 which would erroneously be considered the termination of the escape sequence. */
324 valueString
[valueStringLength
++] = (UChar
) UNICODE_SPACE_CODEPOINT
;
330 valueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
331 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
332 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
336 myValueSource
= valueString
;
338 /* reset the error */
341 ucnv_cbFromUWriteUChars(fromArgs
, &myValueSource
, myValueSource
+valueStringLength
, 0, err
);
343 ucnv_setFromUCallBack (fromArgs
->converter
,
349 if (U_FAILURE (err2
))
360 U_CAPI
void U_EXPORT2
361 UCNV_TO_U_CALLBACK_SKIP (
363 UConverterToUnicodeArgs
*toArgs
,
364 const char* codeUnits
,
366 UConverterCallbackReason reason
,
369 if (reason
<= UCNV_IRREGULAR
)
371 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
375 /* else the caller must have set the error code accordingly. */
377 /* else ignore the reset, close and clone calls. */
380 U_CAPI
void U_EXPORT2
381 UCNV_TO_U_CALLBACK_SUBSTITUTE (
383 UConverterToUnicodeArgs
*toArgs
,
384 const char* codeUnits
,
386 UConverterCallbackReason reason
,
389 if (reason
<= UCNV_IRREGULAR
)
391 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
394 ucnv_cbToUWriteSub(toArgs
,0,err
);
396 /* else the caller must have set the error code accordingly. */
398 /* else ignore the reset, close and clone calls. */
401 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
402 *and uses that as the substitution sequence
404 U_CAPI
void U_EXPORT2
405 UCNV_TO_U_CALLBACK_ESCAPE (
407 UConverterToUnicodeArgs
*toArgs
,
408 const char* codeUnits
,
410 UConverterCallbackReason reason
,
413 UChar uniValueString
[VALUE_STRING_LENGTH
];
414 int32_t valueStringLength
= 0;
417 if (reason
> UCNV_IRREGULAR
)
426 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
427 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_CODEPOINT
; /* adding X */
428 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t) codeUnits
[i
++], 16, 2);
433 switch(*((char*)context
))
435 case UCNV_PRV_ESCAPE_XML_DEC
:
438 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
439 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
440 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 10, 0);
441 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
445 case UCNV_PRV_ESCAPE_XML_HEX
:
448 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
449 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
450 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
451 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 16, 0);
452 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
455 case UCNV_PRV_ESCAPE_C
:
458 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
459 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
460 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 16, 2);
466 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
467 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_CODEPOINT
; /* adding X */
468 uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t) codeUnits
[i
++], 16, 2);
469 valueStringLength
+= 2;
473 /* reset the error */
476 ucnv_cbToUWriteUChars(toArgs
, uniValueString
, valueStringLength
, 0, err
);