git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	*****************************************************************************
	5	*
	6	* Copyright (C) 1998-2016, International Business Machines
	7	* Corporation and others. All Rights Reserved.
	8	*
	9	*****************************************************************************
	10	*
	11	* ucnv_err.c
	12	* Implements error behaviour functions called by T_UConverter_{from,to}Unicode
	13	*
	14	*
	15	* Change history:
	16	*
	17	* 06/29/2000 helena Major rewrite of the callback APIs.
	18	*/
	19
	20	#include "unicode/utypes.h"
	21
	22	#if !UCONFIG_NO_CONVERSION
	23
	24	#include "unicode/ucnv_err.h"
	25	#include "unicode/ucnv_cb.h"
	26	#include "ucnv_cnv.h"
	27	#include "cmemory.h"
	28	#include "unicode/ucnv.h"
	29	#include "ustrfmt.h"
	30
	31	#define VALUE_STRING_LENGTH 48
	32	/Magic # 32 = 4(number of char in value string) 8(max number of bytes per char for any converter) */
	33	#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
	34	#define UNICODE_U_CODEPOINT 0x0055
	35	#define UNICODE_X_CODEPOINT 0x0058
	36	#define UNICODE_RS_CODEPOINT 0x005C
	37	#define UNICODE_U_LOW_CODEPOINT 0x0075
	38	#define UNICODE_X_LOW_CODEPOINT 0x0078
	39	#define UNICODE_AMP_CODEPOINT 0x0026
	40	#define UNICODE_HASH_CODEPOINT 0x0023
	41	#define UNICODE_SEMICOLON_CODEPOINT 0x003B
	42	#define UNICODE_PLUS_CODEPOINT 0x002B
	43	#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
	44	#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
	45	#define UNICODE_SPACE_CODEPOINT 0x0020
	46	#define UCNV_PRV_ESCAPE_ICU 0
	47	#define UCNV_PRV_ESCAPE_C 'C'
	48	#define UCNV_PRV_ESCAPE_XML_DEC 'D'
	49	#define UCNV_PRV_ESCAPE_XML_HEX 'X'
	50	#define UCNV_PRV_ESCAPE_JAVA 'J'
	51	#define UCNV_PRV_ESCAPE_UNICODE 'U'
	52	#define UCNV_PRV_ESCAPE_CSS2 'S'
	53	#define UCNV_PRV_STOP_ON_ILLEGAL 'i'
	54
	55	/*
	56	* IS_DEFAULT_IGNORABLE_CODE_POINT
	57	* This is to check if a code point has the default ignorable unicode property.
	58	* As such, this list needs to be updated if the ignorable code point list ever
	59	* changes.
	60	* To avoid dependency on other code, this list is hard coded here.
	61	* When an ignorable code point is found and is unmappable, the default callbacks
	62	* will ignore them.
	63	* For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
	64	*
	65	* This list should be sync with the one in CharsetCallback.java
	66	*/
	67	#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
	68	(c == 0x00AD) \|\| \
	69	(c == 0x034F) \|\| \
	70	(c == 0x061C) \|\| \
	71	(c == 0x115F) \|\| \
	72	(c == 0x1160) \|\| \
	73	(0x17B4 <= c && c <= 0x17B5) \|\| \
	74	(0x180B <= c && c <= 0x180E) \|\| \
	75	(0x200B <= c && c <= 0x200F) \|\| \
	76	(0x202A <= c && c <= 0x202E) \|\| \
	77	(c == 0x2060) \|\| \
	78	(0x2066 <= c && c <= 0x2069) \|\| \
	79	(0x2061 <= c && c <= 0x2064) \|\| \
	80	(0x206A <= c && c <= 0x206F) \|\| \
	81	(c == 0x3164) \|\| \
	82	(0x0FE00 <= c && c <= 0x0FE0F) \|\| \
	83	(c == 0x0FEFF) \|\| \
	84	(c == 0x0FFA0) \|\| \
	85	(0x01BCA0 <= c && c <= 0x01BCA3) \|\| \
	86	(0x01D173 <= c && c <= 0x01D17A) \|\| \
	87	(c == 0x0E0001) \|\| \
	88	(0x0E0020 <= c && c <= 0x0E007F) \|\| \
	89	(0x0E0100 <= c && c <= 0x0E01EF) \|\| \
	90	(c == 0x2065) \|\| \
	91	(0x0FFF0 <= c && c <= 0x0FFF8) \|\| \
	92	(c == 0x0E0000) \|\| \
	93	(0x0E0002 <= c && c <= 0x0E001F) \|\| \
	94	(0x0E0080 <= c && c <= 0x0E00FF) \|\| \
	95	(0x0E01F0 <= c && c <= 0x0E0FFF) \
	96	)
	97
	98
	99	/Function Pointer STOPS at the ILLEGAL_SEQUENCE /
	100	U_CAPI void U_EXPORT2
	101	UCNV_FROM_U_CALLBACK_STOP (
	102	const void *context,
	103	UConverterFromUnicodeArgs *fromUArgs,
	104	const UChar* codeUnits,
	105	int32_t length,
	106	UChar32 codePoint,
	107	UConverterCallbackReason reason,
	108	UErrorCode * err)
	109	{
	110	(void)context;
	111	(void)fromUArgs;
	112	(void)codeUnits;
	113	(void)length;
	114	if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	115	{
	116	/*
	117	* Skip if the codepoint has unicode property of default ignorable.
	118	*/
	119	*err = U_ZERO_ERROR;
	120	}
	121	/* the caller must have set the error code accordingly */
	122	return;
	123	}
	124
	125
	126	/Function Pointer STOPS at the ILLEGAL_SEQUENCE /
	127	U_CAPI void U_EXPORT2
	128	UCNV_TO_U_CALLBACK_STOP (
	129	const void *context,
	130	UConverterToUnicodeArgs *toUArgs,
	131	const char* codePoints,
	132	int32_t length,
	133	UConverterCallbackReason reason,
	134	UErrorCode * err)
	135	{
	136	/* the caller must have set the error code accordingly */
	137	(void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
	138	return;
	139	}
	140
	141	U_CAPI void U_EXPORT2
	142	UCNV_FROM_U_CALLBACK_SKIP (
	143	const void *context,
	144	UConverterFromUnicodeArgs *fromUArgs,
	145	const UChar* codeUnits,
	146	int32_t length,
	147	UChar32 codePoint,
	148	UConverterCallbackReason reason,
	149	UErrorCode * err)
	150	{
	151	(void)fromUArgs;
	152	(void)codeUnits;
	153	(void)length;
	154	if (reason <= UCNV_IRREGULAR)
	155	{
	156	if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	157	{
	158	/*
	159	* Skip if the codepoint has unicode property of default ignorable.
	160	*/
	161	*err = U_ZERO_ERROR;
	162	}
	163	else if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	164	{
	165	*err = U_ZERO_ERROR;
	166	}
	167	/* else the caller must have set the error code accordingly. */
	168	}
	169	/* else ignore the reset, close and clone calls. */
	170	}
	171
	172	U_CAPI void U_EXPORT2
	173	UCNV_FROM_U_CALLBACK_SUBSTITUTE (
	174	const void *context,
	175	UConverterFromUnicodeArgs *fromArgs,
	176	const UChar* codeUnits,
	177	int32_t length,
	178	UChar32 codePoint,
	179	UConverterCallbackReason reason,
	180	UErrorCode * err)
	181	{
	182	(void)codeUnits;
	183	(void)length;
	184	if (reason <= UCNV_IRREGULAR)
	185	{
	186	if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	187	{
	188	/*
	189	* Skip if the codepoint has unicode property of default ignorable.
	190	*/
	191	*err = U_ZERO_ERROR;
	192	}
	193	else if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	194	{
	195	*err = U_ZERO_ERROR;
	196	ucnv_cbFromUWriteSub(fromArgs, 0, err);
	197	}
	198	/* else the caller must have set the error code accordingly. */
	199	}
	200	/* else ignore the reset, close and clone calls. */
	201	}
	202
	203	/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
	204	*uses a clean copy (resetted) of the converter, to convert that unicode
	205	*escape sequence to the target codepage (if conversion failure happens then
	206	*we revert to substituting with subchar)
	207	*/
	208	U_CAPI void U_EXPORT2
	209	UCNV_FROM_U_CALLBACK_ESCAPE (
	210	const void *context,
	211	UConverterFromUnicodeArgs *fromArgs,
	212	const UChar *codeUnits,
	213	int32_t length,
	214	UChar32 codePoint,
	215	UConverterCallbackReason reason,
	216	UErrorCode * err)
	217	{
	218
	219	UChar valueString[VALUE_STRING_LENGTH];
	220	int32_t valueStringLength = 0;
	221	int32_t i = 0;
	222
	223	const UChar *myValueSource = NULL;
	224	UErrorCode err2 = U_ZERO_ERROR;
	225	UConverterFromUCallback original = NULL;
	226	const void *originalContext;
	227
	228	UConverterFromUCallback ignoredCallback = NULL;
	229	const void *ignoredContext;
	230
	231	if (reason > UCNV_IRREGULAR)
	232	{
	233	return;
	234	}
	235	else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	236	{
	237	/*
	238	* Skip if the codepoint has unicode property of default ignorable.
	239	*/
	240	*err = U_ZERO_ERROR;
	241	return;
	242	}
	243
	244	ucnv_setFromUCallBack (fromArgs->converter,
	245	(UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
	246	NULL,
	247	&original,
	248	&originalContext,
	249	&err2);
	250
	251	if (U_FAILURE (err2))
	252	{
	253	*err = err2;
	254	return;
	255	}
	256	if(context==NULL)
	257	{
	258	while (i < length)
	259	{
	260	valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	261	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	262	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
	263	}
	264	}
	265	else
	266	{
	267	switch(((char)context))
	268	{
	269	case UCNV_PRV_ESCAPE_JAVA:
	270	while (i < length)
	271	{
	272	valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	273	valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
	274	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
	275	}
	276	break;
	277
	278	case UCNV_PRV_ESCAPE_C:
	279	valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	280
	281	if(length==2){
	282	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	283	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
	284
	285	}
	286	else{
	287	valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
	288	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
	289	}
	290	break;
	291
	292	case UCNV_PRV_ESCAPE_XML_DEC:
	293
	294	valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	295	valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	296	if(length==2){
	297	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
	298	}
	299	else{
	300	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
	301	}
	302	valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	303	break;
	304
	305	case UCNV_PRV_ESCAPE_XML_HEX:
	306
	307	valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	308	valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	309	valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
	310	if(length==2){
	311	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
	312	}
	313	else{
	314	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
	315	}
	316	valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	317	break;
	318
	319	case UCNV_PRV_ESCAPE_UNICODE:
	320	valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
	321	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	322	valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
	323	if (length == 2) {
	324	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
	325	} else {
	326	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
	327	}
	328	valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
	329	break;
	330
	331	case UCNV_PRV_ESCAPE_CSS2:
	332	valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	333	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
	334	/* Always add space character, becase the next character might be whitespace,
	335	which would erroneously be considered the termination of the escape sequence. */
	336	valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
	337	break;
	338
	339	default:
	340	while (i < length)
	341	{
	342	valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	343	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	344	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
	345	}
	346	}
	347	}
	348	myValueSource = valueString;
	349
	350	/* reset the error */
	351	*err = U_ZERO_ERROR;
	352
	353	ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
	354
	355	ucnv_setFromUCallBack (fromArgs->converter,
	356	original,
	357	originalContext,
	358	&ignoredCallback,
	359	&ignoredContext,
	360	&err2);
	361	if (U_FAILURE (err2))
	362	{
	363	*err = err2;
	364	return;
	365	}
	366
	367	return;
	368	}
	369
	370
	371
	372	U_CAPI void U_EXPORT2
	373	UCNV_TO_U_CALLBACK_SKIP (
	374	const void *context,
	375	UConverterToUnicodeArgs *toArgs,
	376	const char* codeUnits,
	377	int32_t length,
	378	UConverterCallbackReason reason,
	379	UErrorCode * err)
	380	{
	381	(void)toArgs;
	382	(void)codeUnits;
	383	(void)length;
	384	if (reason <= UCNV_IRREGULAR)
	385	{
	386	if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	387	{
	388	*err = U_ZERO_ERROR;
	389	}
	390	/* else the caller must have set the error code accordingly. */
	391	}
	392	/* else ignore the reset, close and clone calls. */
	393	}
	394
	395	U_CAPI void U_EXPORT2
	396	UCNV_TO_U_CALLBACK_SUBSTITUTE (
	397	const void *context,
	398	UConverterToUnicodeArgs *toArgs,
	399	const char* codeUnits,
	400	int32_t length,
	401	UConverterCallbackReason reason,
	402	UErrorCode * err)
	403	{
	404	(void)codeUnits;
	405	(void)length;
	406	if (reason <= UCNV_IRREGULAR)
	407	{
	408	if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	409	{
	410	*err = U_ZERO_ERROR;
	411	ucnv_cbToUWriteSub(toArgs,0,err);
	412	}
	413	/* else the caller must have set the error code accordingly. */
	414	}
	415	/* else ignore the reset, close and clone calls. */
	416	}
	417
	418	/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
	419	*and uses that as the substitution sequence
	420	*/
	421	U_CAPI void U_EXPORT2
	422	UCNV_TO_U_CALLBACK_ESCAPE (
	423	const void *context,
	424	UConverterToUnicodeArgs *toArgs,
	425	const char* codeUnits,
	426	int32_t length,
	427	UConverterCallbackReason reason,
	428	UErrorCode * err)
	429	{
	430	UChar uniValueString[VALUE_STRING_LENGTH];
	431	int32_t valueStringLength = 0;
	432	int32_t i = 0;
	433
	434	if (reason > UCNV_IRREGULAR)
	435	{
	436	return;
	437	}
	438
	439	if(context==NULL)
	440	{
	441	while (i < length)
	442	{
	443	uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	444	uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
	445	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
	446	}
	447	}
	448	else
	449	{
	450	switch(((char)context))
	451	{
	452	case UCNV_PRV_ESCAPE_XML_DEC:
	453	while (i < length)
	454	{
	455	uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	456	uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	457	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
	458	uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	459	}
	460	break;
	461
	462	case UCNV_PRV_ESCAPE_XML_HEX:
	463	while (i < length)
	464	{
	465	uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	466	uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	467	uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
	468	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
	469	uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	470	}
	471	break;
	472	case UCNV_PRV_ESCAPE_C:
	473	while (i < length)
	474	{
	475	uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	476	uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
	477	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
	478	}
	479	break;
	480	default:
	481	while (i < length)
	482	{
	483	uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	484	uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
	485	uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
	486	valueStringLength += 2;
	487	}
	488	}
	489	}
	490	/* reset the error */
	491	*err = U_ZERO_ERROR;
	492
	493	ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
	494	}
	495
	496	#endif