]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnv_err.c
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / common / ucnv_err.c
CommitLineData
b75a7d8f
A
1/*
2 *****************************************************************************
3 *
46f4442e 4 * Copyright (C) 1998-2007, International Business Machines
b75a7d8f
A
5 * Corporation and others. All Rights Reserved.
6 *
7 *****************************************************************************
8 *
9 * ucnv_err.c
10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
11 *
12 *
13* Change history:
14*
15* 06/29/2000 helena Major rewrite of the callback APIs.
16*/
17
374ca955
A
18#include "unicode/utypes.h"
19
20#if !UCONFIG_NO_CONVERSION
21
b75a7d8f
A
22#include "unicode/ucnv_err.h"
23#include "unicode/ucnv_cb.h"
24#include "ucnv_cnv.h"
25#include "cmemory.h"
26#include "unicode/ucnv.h"
27#include "ustrfmt.h"
28
29#define VALUE_STRING_LENGTH 32
30/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
32#define UNICODE_U_CODEPOINT 0x0055
33#define UNICODE_X_CODEPOINT 0x0058
34#define UNICODE_RS_CODEPOINT 0x005C
35#define UNICODE_U_LOW_CODEPOINT 0x0075
36#define UNICODE_X_LOW_CODEPOINT 0x0078
37#define UNICODE_AMP_CODEPOINT 0x0026
38#define UNICODE_HASH_CODEPOINT 0x0023
39#define UNICODE_SEMICOLON_CODEPOINT 0x003B
40#define UNICODE_PLUS_CODEPOINT 0x002B
41#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
42#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
46f4442e
A
43#define UNICODE_SPACE_CODEPOINT 0x0020
44#define UCNV_PRV_ESCAPE_ICU 0
45#define UCNV_PRV_ESCAPE_C 'C'
46#define UCNV_PRV_ESCAPE_XML_DEC 'D'
47#define UCNV_PRV_ESCAPE_XML_HEX 'X'
48#define UCNV_PRV_ESCAPE_JAVA 'J'
49#define UCNV_PRV_ESCAPE_UNICODE 'U'
50#define UCNV_PRV_ESCAPE_CSS2 'S'
51#define UCNV_PRV_STOP_ON_ILLEGAL 'i'
b75a7d8f
A
52
53/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
54U_CAPI void U_EXPORT2
55UCNV_FROM_U_CALLBACK_STOP (
56 const void *context,
57 UConverterFromUnicodeArgs *fromUArgs,
58 const UChar* codeUnits,
59 int32_t length,
60 UChar32 codePoint,
61 UConverterCallbackReason reason,
62 UErrorCode * err)
63{
46f4442e
A
64 /* the caller must have set the error code accordingly */
65 return;
b75a7d8f
A
66}
67
68
69/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
70U_CAPI void U_EXPORT2
71UCNV_TO_U_CALLBACK_STOP (
72 const void *context,
73 UConverterToUnicodeArgs *toUArgs,
74 const char* codePoints,
75 int32_t length,
76 UConverterCallbackReason reason,
77 UErrorCode * err)
78{
46f4442e
A
79 /* the caller must have set the error code accordingly */
80 return;
b75a7d8f
A
81}
82
83U_CAPI void U_EXPORT2
84UCNV_FROM_U_CALLBACK_SKIP (
85 const void *context,
86 UConverterFromUnicodeArgs *fromUArgs,
87 const UChar* codeUnits,
88 int32_t length,
89 UChar32 codePoint,
90 UConverterCallbackReason reason,
91 UErrorCode * err)
92{
46f4442e 93 if (reason <= UCNV_IRREGULAR)
b75a7d8f 94 {
46f4442e 95 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
b75a7d8f
A
96 {
97 *err = U_ZERO_ERROR;
b75a7d8f 98 }
46f4442e 99 /* else the caller must have set the error code accordingly. */
b75a7d8f 100 }
46f4442e 101 /* else ignore the reset, close and clone calls. */
b75a7d8f
A
102}
103
104U_CAPI void U_EXPORT2
105UCNV_FROM_U_CALLBACK_SUBSTITUTE (
106 const void *context,
107 UConverterFromUnicodeArgs *fromArgs,
108 const UChar* codeUnits,
109 int32_t length,
110 UChar32 codePoint,
111 UConverterCallbackReason reason,
112 UErrorCode * err)
113{
46f4442e 114 if (reason <= UCNV_IRREGULAR)
b75a7d8f 115 {
46f4442e 116 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
b75a7d8f
A
117 {
118 *err = U_ZERO_ERROR;
119 ucnv_cbFromUWriteSub(fromArgs, 0, err);
b75a7d8f 120 }
46f4442e 121 /* else the caller must have set the error code accordingly. */
b75a7d8f 122 }
46f4442e 123 /* else ignore the reset, close and clone calls. */
b75a7d8f
A
124}
125
126/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
127 *uses a clean copy (resetted) of the converter, to convert that unicode
128 *escape sequence to the target codepage (if conversion failure happens then
129 *we revert to substituting with subchar)
130 */
131U_CAPI void U_EXPORT2
132UCNV_FROM_U_CALLBACK_ESCAPE (
133 const void *context,
134 UConverterFromUnicodeArgs *fromArgs,
135 const UChar *codeUnits,
136 int32_t length,
137 UChar32 codePoint,
138 UConverterCallbackReason reason,
139 UErrorCode * err)
140{
141
142 UChar valueString[VALUE_STRING_LENGTH];
143 int32_t valueStringLength = 0;
144 int32_t i = 0;
145
146 const UChar *myValueSource = NULL;
147 UErrorCode err2 = U_ZERO_ERROR;
148 UConverterFromUCallback original = NULL;
149 const void *originalContext;
150
151 UConverterFromUCallback ignoredCallback = NULL;
152 const void *ignoredContext;
153
154 if (reason > UCNV_IRREGULAR)
155 {
46f4442e 156 return;
b75a7d8f
A
157 }
158
159 ucnv_setFromUCallBack (fromArgs->converter,
160 (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
161 NULL,
162 &original,
163 &originalContext,
164 &err2);
165
166 if (U_FAILURE (err2))
167 {
168 *err = err2;
169 return;
170 }
171 if(context==NULL)
172 {
173 while (i < length)
174 {
175 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
176 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
177 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
178 }
179 }
180 else
181 {
182 switch(*((char*)context))
183 {
46f4442e 184 case UCNV_PRV_ESCAPE_JAVA:
b75a7d8f
A
185 while (i < length)
186 {
46f4442e
A
187 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
188 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
189 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
b75a7d8f
A
190 }
191 break;
192
46f4442e
A
193 case UCNV_PRV_ESCAPE_C:
194 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
b75a7d8f 195
46f4442e
A
196 if(length==2){
197 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
198 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
199
200 }
201 else{
202 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
203 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
204 }
b75a7d8f
A
205 break;
206
46f4442e
A
207 case UCNV_PRV_ESCAPE_XML_DEC:
208
209 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
210 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
211 if(length==2){
212 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
213 }
214 else{
215 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
216 }
217 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
b75a7d8f
A
218 break;
219
46f4442e 220 case UCNV_PRV_ESCAPE_XML_HEX:
b75a7d8f 221
46f4442e
A
222 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
223 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
224 valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
225 if(length==2){
226 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
227 }
228 else{
229 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
230 }
231 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
b75a7d8f
A
232 break;
233
46f4442e 234 case UCNV_PRV_ESCAPE_UNICODE:
b75a7d8f
A
235 valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
236 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
237 valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
238 if (length == 2) {
46f4442e 239 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
b75a7d8f 240 } else {
46f4442e 241 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
b75a7d8f
A
242 }
243 valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
244 break;
245
46f4442e
A
246 case UCNV_PRV_ESCAPE_CSS2:
247 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
248 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
249 /* Always add space character, becase the next character might be whitespace,
250 which would erroneously be considered the termination of the escape sequence. */
251 valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
252 break;
253
254 default:
b75a7d8f
A
255 while (i < length)
256 {
46f4442e
A
257 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
258 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
259 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
b75a7d8f
A
260 }
261 }
b75a7d8f
A
262 }
263 myValueSource = valueString;
264
265 /* reset the error */
266 *err = U_ZERO_ERROR;
267
268 ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
269
270 ucnv_setFromUCallBack (fromArgs->converter,
271 original,
272 originalContext,
273 &ignoredCallback,
274 &ignoredContext,
275 &err2);
276 if (U_FAILURE (err2))
46f4442e 277 {
b75a7d8f
A
278 *err = err2;
279 return;
46f4442e 280 }
b75a7d8f
A
281
282 return;
283}
284
285
286
287U_CAPI void U_EXPORT2
288UCNV_TO_U_CALLBACK_SKIP (
289 const void *context,
290 UConverterToUnicodeArgs *toArgs,
291 const char* codeUnits,
292 int32_t length,
293 UConverterCallbackReason reason,
294 UErrorCode * err)
295{
46f4442e 296 if (reason <= UCNV_IRREGULAR)
b75a7d8f 297 {
46f4442e 298 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
b75a7d8f
A
299 {
300 *err = U_ZERO_ERROR;
b75a7d8f 301 }
46f4442e 302 /* else the caller must have set the error code accordingly. */
b75a7d8f 303 }
46f4442e 304 /* else ignore the reset, close and clone calls. */
b75a7d8f
A
305}
306
307U_CAPI void U_EXPORT2
308UCNV_TO_U_CALLBACK_SUBSTITUTE (
309 const void *context,
310 UConverterToUnicodeArgs *toArgs,
311 const char* codeUnits,
312 int32_t length,
313 UConverterCallbackReason reason,
314 UErrorCode * err)
315{
46f4442e 316 if (reason <= UCNV_IRREGULAR)
b75a7d8f 317 {
46f4442e 318 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
b75a7d8f
A
319 {
320 *err = U_ZERO_ERROR;
321 ucnv_cbToUWriteSub(toArgs,0,err);
b75a7d8f 322 }
46f4442e 323 /* else the caller must have set the error code accordingly. */
b75a7d8f 324 }
46f4442e 325 /* else ignore the reset, close and clone calls. */
b75a7d8f
A
326}
327
328/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
329 *and uses that as the substitution sequence
330 */
331U_CAPI void U_EXPORT2
332UCNV_TO_U_CALLBACK_ESCAPE (
333 const void *context,
334 UConverterToUnicodeArgs *toArgs,
335 const char* codeUnits,
336 int32_t length,
337 UConverterCallbackReason reason,
338 UErrorCode * err)
339{
340 UChar uniValueString[VALUE_STRING_LENGTH];
341 int32_t valueStringLength = 0;
342 int32_t i = 0;
343
344 if (reason > UCNV_IRREGULAR)
345 {
346 return;
347 }
348
46f4442e
A
349 if(context==NULL)
350 {
351 while (i < length)
352 {
353 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
354 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
355 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
356 }
b75a7d8f 357 }
46f4442e
A
358 else
359 {
360 switch(*((char*)context))
361 {
b75a7d8f 362 case UCNV_PRV_ESCAPE_XML_DEC:
46f4442e
A
363 while (i < length)
364 {
b75a7d8f
A
365 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
366 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
367 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
368 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
46f4442e
A
369 }
370 break;
b75a7d8f
A
371
372 case UCNV_PRV_ESCAPE_XML_HEX:
46f4442e
A
373 while (i < length)
374 {
b75a7d8f
A
375 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
376 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
377 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
378 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
379 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
46f4442e
A
380 }
381 break;
b75a7d8f 382 case UCNV_PRV_ESCAPE_C:
46f4442e
A
383 while (i < length)
384 {
b75a7d8f
A
385 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
386 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
387 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
46f4442e
A
388 }
389 break;
b75a7d8f 390 default:
46f4442e
A
391 while (i < length)
392 {
b75a7d8f
A
393 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
394 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
395 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
396 valueStringLength += 2;
46f4442e
A
397 }
398 }
399 }
b75a7d8f
A
400 /* reset the error */
401 *err = U_ZERO_ERROR;
402
403 ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
404}
374ca955
A
405
406#endif