2 *****************************************************************************
4 * Copyright (C) 1998-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *****************************************************************************
10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
15 * 06/29/2000 helena Major rewrite of the callback APIs.
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_CONVERSION
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
26 #include "unicode/ucnv.h"
29 #define VALUE_STRING_LENGTH 32
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
32 #define UNICODE_U_CODEPOINT 0x0055
33 #define UNICODE_X_CODEPOINT 0x0058
34 #define UNICODE_RS_CODEPOINT 0x005C
35 #define UNICODE_U_LOW_CODEPOINT 0x0075
36 #define UNICODE_X_LOW_CODEPOINT 0x0078
37 #define UNICODE_AMP_CODEPOINT 0x0026
38 #define UNICODE_HASH_CODEPOINT 0x0023
39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B
40 #define UNICODE_PLUS_CODEPOINT 0x002B
41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
43 #define UNICODE_SPACE_CODEPOINT 0x0020
44 #define UCNV_PRV_ESCAPE_ICU 0
45 #define UCNV_PRV_ESCAPE_C 'C'
46 #define UCNV_PRV_ESCAPE_XML_DEC 'D'
47 #define UCNV_PRV_ESCAPE_XML_HEX 'X'
48 #define UCNV_PRV_ESCAPE_JAVA 'J'
49 #define UCNV_PRV_ESCAPE_UNICODE 'U'
50 #define UCNV_PRV_ESCAPE_CSS2 'S'
51 #define UCNV_PRV_STOP_ON_ILLEGAL 'i'
53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
55 UCNV_FROM_U_CALLBACK_STOP (
57 UConverterFromUnicodeArgs
*fromUArgs
,
58 const UChar
* codeUnits
,
61 UConverterCallbackReason reason
,
64 /* the caller must have set the error code accordingly */
69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
71 UCNV_TO_U_CALLBACK_STOP (
73 UConverterToUnicodeArgs
*toUArgs
,
74 const char* codePoints
,
76 UConverterCallbackReason reason
,
79 /* the caller must have set the error code accordingly */
84 UCNV_FROM_U_CALLBACK_SKIP (
86 UConverterFromUnicodeArgs
*fromUArgs
,
87 const UChar
* codeUnits
,
90 UConverterCallbackReason reason
,
93 if (reason
<= UCNV_IRREGULAR
)
95 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
99 /* else the caller must have set the error code accordingly. */
101 /* else ignore the reset, close and clone calls. */
104 U_CAPI
void U_EXPORT2
105 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
107 UConverterFromUnicodeArgs
*fromArgs
,
108 const UChar
* codeUnits
,
111 UConverterCallbackReason reason
,
114 if (reason
<= UCNV_IRREGULAR
)
116 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
119 ucnv_cbFromUWriteSub(fromArgs
, 0, err
);
121 /* else the caller must have set the error code accordingly. */
123 /* else ignore the reset, close and clone calls. */
126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
127 *uses a clean copy (resetted) of the converter, to convert that unicode
128 *escape sequence to the target codepage (if conversion failure happens then
129 *we revert to substituting with subchar)
131 U_CAPI
void U_EXPORT2
132 UCNV_FROM_U_CALLBACK_ESCAPE (
134 UConverterFromUnicodeArgs
*fromArgs
,
135 const UChar
*codeUnits
,
138 UConverterCallbackReason reason
,
142 UChar valueString
[VALUE_STRING_LENGTH
];
143 int32_t valueStringLength
= 0;
146 const UChar
*myValueSource
= NULL
;
147 UErrorCode err2
= U_ZERO_ERROR
;
148 UConverterFromUCallback original
= NULL
;
149 const void *originalContext
;
151 UConverterFromUCallback ignoredCallback
= NULL
;
152 const void *ignoredContext
;
154 if (reason
> UCNV_IRREGULAR
)
159 ucnv_setFromUCallBack (fromArgs
->converter
,
160 (UConverterFromUCallback
) UCNV_FROM_U_CALLBACK_SUBSTITUTE
,
166 if (U_FAILURE (err2
))
175 valueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
176 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
177 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
182 switch(*((char*)context
))
184 case UCNV_PRV_ESCAPE_JAVA
:
187 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
188 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_LOW_CODEPOINT
; /* adding u */
189 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
193 case UCNV_PRV_ESCAPE_C
:
194 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
197 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
198 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 8);
202 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_LOW_CODEPOINT
; /* adding u */
203 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 4);
207 case UCNV_PRV_ESCAPE_XML_DEC
:
209 valueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
210 valueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
212 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 10, 0);
215 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 10, 0);
217 valueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
220 case UCNV_PRV_ESCAPE_XML_HEX
:
222 valueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
223 valueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
224 valueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
226 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 0);
229 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 0);
231 valueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
234 case UCNV_PRV_ESCAPE_UNICODE
:
235 valueString
[valueStringLength
++] = (UChar
) UNICODE_LEFT_CURLY_CODEPOINT
; /* adding { */
236 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
237 valueString
[valueStringLength
++] = (UChar
) UNICODE_PLUS_CODEPOINT
; /* adding + */
239 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 4);
241 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[0], 16, 4);
243 valueString
[valueStringLength
++] = (UChar
) UNICODE_RIGHT_CURLY_CODEPOINT
; /* adding } */
246 case UCNV_PRV_ESCAPE_CSS2
:
247 valueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
248 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, codePoint
, 16, 0);
249 /* Always add space character, becase the next character might be whitespace,
250 which would erroneously be considered the termination of the escape sequence. */
251 valueString
[valueStringLength
++] = (UChar
) UNICODE_SPACE_CODEPOINT
;
257 valueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
258 valueString
[valueStringLength
++] = (UChar
) UNICODE_U_CODEPOINT
; /* adding U */
259 valueStringLength
+= uprv_itou (valueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint16_t)codeUnits
[i
++], 16, 4);
263 myValueSource
= valueString
;
265 /* reset the error */
268 ucnv_cbFromUWriteUChars(fromArgs
, &myValueSource
, myValueSource
+valueStringLength
, 0, err
);
270 ucnv_setFromUCallBack (fromArgs
->converter
,
276 if (U_FAILURE (err2
))
287 U_CAPI
void U_EXPORT2
288 UCNV_TO_U_CALLBACK_SKIP (
290 UConverterToUnicodeArgs
*toArgs
,
291 const char* codeUnits
,
293 UConverterCallbackReason reason
,
296 if (reason
<= UCNV_IRREGULAR
)
298 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
302 /* else the caller must have set the error code accordingly. */
304 /* else ignore the reset, close and clone calls. */
307 U_CAPI
void U_EXPORT2
308 UCNV_TO_U_CALLBACK_SUBSTITUTE (
310 UConverterToUnicodeArgs
*toArgs
,
311 const char* codeUnits
,
313 UConverterCallbackReason reason
,
316 if (reason
<= UCNV_IRREGULAR
)
318 if (context
== NULL
|| (*((char*)context
) == UCNV_PRV_STOP_ON_ILLEGAL
&& reason
== UCNV_UNASSIGNED
))
321 ucnv_cbToUWriteSub(toArgs
,0,err
);
323 /* else the caller must have set the error code accordingly. */
325 /* else ignore the reset, close and clone calls. */
328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
329 *and uses that as the substitution sequence
331 U_CAPI
void U_EXPORT2
332 UCNV_TO_U_CALLBACK_ESCAPE (
334 UConverterToUnicodeArgs
*toArgs
,
335 const char* codeUnits
,
337 UConverterCallbackReason reason
,
340 UChar uniValueString
[VALUE_STRING_LENGTH
];
341 int32_t valueStringLength
= 0;
344 if (reason
> UCNV_IRREGULAR
)
353 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
354 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_CODEPOINT
; /* adding X */
355 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t) codeUnits
[i
++], 16, 2);
360 switch(*((char*)context
))
362 case UCNV_PRV_ESCAPE_XML_DEC
:
365 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
366 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
367 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 10, 0);
368 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
372 case UCNV_PRV_ESCAPE_XML_HEX
:
375 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_AMP_CODEPOINT
; /* adding & */
376 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_HASH_CODEPOINT
; /* adding # */
377 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
378 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 16, 0);
379 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_SEMICOLON_CODEPOINT
; /* adding ; */
382 case UCNV_PRV_ESCAPE_C
:
385 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_RS_CODEPOINT
; /* adding \ */
386 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_LOW_CODEPOINT
; /* adding x */
387 valueStringLength
+= uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t)codeUnits
[i
++], 16, 2);
393 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_PERCENT_SIGN_CODEPOINT
; /* adding % */
394 uniValueString
[valueStringLength
++] = (UChar
) UNICODE_X_CODEPOINT
; /* adding X */
395 uprv_itou (uniValueString
+ valueStringLength
, VALUE_STRING_LENGTH
- valueStringLength
, (uint8_t) codeUnits
[i
++], 16, 2);
396 valueStringLength
+= 2;
400 /* reset the error */
403 ucnv_cbToUWriteUChars(toArgs
, uniValueString
, valueStringLength
, 0, err
);