git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	*****************************************************************************
	5	*
	6	* Copyright (C) 1998-2016, International Business Machines
	7	* Corporation and others. All Rights Reserved.
	8	*
	9	*****************************************************************************
	10	*
	11	* ucnv_err.c
	12	* Implements error behaviour functions called by T_UConverter_{from,to}Unicode
	13	*
	14	*
	15	* Change history:
	16	*
	17	* 06/29/2000 helena Major rewrite of the callback APIs.
	18	*/
	19
	20	#include "unicode/utypes.h"
	21
	22	#if !UCONFIG_NO_CONVERSION
	23
	24	#include "unicode/ucnv_err.h"
	25	#include "unicode/ucnv_cb.h"
	26	#include "ucnv_cnv.h"
	27	#include "cmemory.h"
	28	#include "unicode/ucnv.h"
	29	#include "ustrfmt.h"
	30
	31	#define VALUE_STRING_LENGTH 48
	32	/Magic # 32 = 4(number of char in value string) 8(max number of bytes per char for any converter) */
	33	#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
	34	#define UNICODE_U_CODEPOINT 0x0055
	35	#define UNICODE_X_CODEPOINT 0x0058
	36	#define UNICODE_RS_CODEPOINT 0x005C
	37	#define UNICODE_U_LOW_CODEPOINT 0x0075
	38	#define UNICODE_X_LOW_CODEPOINT 0x0078
	39	#define UNICODE_AMP_CODEPOINT 0x0026
	40	#define UNICODE_HASH_CODEPOINT 0x0023
	41	#define UNICODE_SEMICOLON_CODEPOINT 0x003B
	42	#define UNICODE_PLUS_CODEPOINT 0x002B
	43	#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
	44	#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
	45	#define UNICODE_SPACE_CODEPOINT 0x0020
	46	#define UCNV_PRV_ESCAPE_ICU 0
	47	#define UCNV_PRV_ESCAPE_C 'C'
	48	#define UCNV_PRV_ESCAPE_XML_DEC 'D'
	49	#define UCNV_PRV_ESCAPE_XML_HEX 'X'
	50	#define UCNV_PRV_ESCAPE_JAVA 'J'
	51	#define UCNV_PRV_ESCAPE_UNICODE 'U'
	52	#define UCNV_PRV_ESCAPE_CSS2 'S'
	53	#define UCNV_PRV_STOP_ON_ILLEGAL 'i'
	54
	55	/*
	56	* IS_DEFAULT_IGNORABLE_CODE_POINT
	57	* This is to check if a code point has the default ignorable unicode property.
	58	* As such, this list needs to be updated if the ignorable code point list ever
	59	* changes.
	60	* To avoid dependency on other code, this list is hard coded here.
	61	* When an ignorable code point is found and is unmappable, the default callbacks
	62	* will ignore them.
	63	* For a list of the default ignorable code points, use this link:
	64	* https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
	65	*
	66	* This list should be sync with the one in CharsetCallback.java
	67	*/
	68	#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
	69	(c == 0x00AD) \|\| \
	70	(c == 0x034F) \|\| \
	71	(c == 0x061C) \|\| \
	72	(c == 0x115F) \|\| \
	73	(c == 0x1160) \|\| \
	74	(0x17B4 <= c && c <= 0x17B5) \|\| \
	75	(0x180B <= c && c <= 0x180E) \|\| \
	76	(0x200B <= c && c <= 0x200F) \|\| \
	77	(0x202A <= c && c <= 0x202E) \|\| \
	78	(0x2060 <= c && c <= 0x206F) \|\| \
	79	(c == 0x3164) \|\| \
	80	(0xFE00 <= c && c <= 0xFE0F) \|\| \
	81	(c == 0xFEFF) \|\| \
	82	(c == 0xFFA0) \|\| \
	83	(0xFFF0 <= c && c <= 0xFFF8) \|\| \
	84	(0x1BCA0 <= c && c <= 0x1BCA3) \|\| \
	85	(0x1D173 <= c && c <= 0x1D17A) \|\| \
	86	(0xE0000 <= c && c <= 0xE0FFF))
	87
	88
	89	/Function Pointer STOPS at the ILLEGAL_SEQUENCE /
	90	U_CAPI void U_EXPORT2
	91	UCNV_FROM_U_CALLBACK_STOP (
	92	const void *context,
	93	UConverterFromUnicodeArgs *fromUArgs,
	94	const UChar* codeUnits,
	95	int32_t length,
	96	UChar32 codePoint,
	97	UConverterCallbackReason reason,
	98	UErrorCode * err)
	99	{
	100	(void)context;
	101	(void)fromUArgs;
	102	(void)codeUnits;
	103	(void)length;
	104	if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	105	{
	106	/*
	107	* Skip if the codepoint has unicode property of default ignorable.
	108	*/
	109	*err = U_ZERO_ERROR;
	110	}
	111	/* the caller must have set the error code accordingly */
	112	return;
	113	}
	114
	115
	116	/Function Pointer STOPS at the ILLEGAL_SEQUENCE /
	117	U_CAPI void U_EXPORT2
	118	UCNV_TO_U_CALLBACK_STOP (
	119	const void *context,
	120	UConverterToUnicodeArgs *toUArgs,
	121	const char* codePoints,
	122	int32_t length,
	123	UConverterCallbackReason reason,
	124	UErrorCode * err)
	125	{
	126	/* the caller must have set the error code accordingly */
	127	(void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
	128	return;
	129	}
	130
	131	U_CAPI void U_EXPORT2
	132	UCNV_FROM_U_CALLBACK_SKIP (
	133	const void *context,
	134	UConverterFromUnicodeArgs *fromUArgs,
	135	const UChar* codeUnits,
	136	int32_t length,
	137	UChar32 codePoint,
	138	UConverterCallbackReason reason,
	139	UErrorCode * err)
	140	{
	141	(void)fromUArgs;
	142	(void)codeUnits;
	143	(void)length;
	144	if (reason <= UCNV_IRREGULAR)
	145	{
	146	if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	147	{
	148	/*
	149	* Skip if the codepoint has unicode property of default ignorable.
	150	*/
	151	*err = U_ZERO_ERROR;
	152	}
	153	else if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	154	{
	155	*err = U_ZERO_ERROR;
	156	}
	157	/* else the caller must have set the error code accordingly. */
	158	}
	159	/* else ignore the reset, close and clone calls. */
	160	}
	161
	162	U_CAPI void U_EXPORT2
	163	UCNV_FROM_U_CALLBACK_SUBSTITUTE (
	164	const void *context,
	165	UConverterFromUnicodeArgs *fromArgs,
	166	const UChar* codeUnits,
	167	int32_t length,
	168	UChar32 codePoint,
	169	UConverterCallbackReason reason,
	170	UErrorCode * err)
	171	{
	172	(void)codeUnits;
	173	(void)length;
	174	if (reason <= UCNV_IRREGULAR)
	175	{
	176	if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	177	{
	178	/*
	179	* Skip if the codepoint has unicode property of default ignorable.
	180	*/
	181	*err = U_ZERO_ERROR;
	182	}
	183	else if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	184	{
	185	*err = U_ZERO_ERROR;
	186	ucnv_cbFromUWriteSub(fromArgs, 0, err);
	187	}
	188	/* else the caller must have set the error code accordingly. */
	189	}
	190	/* else ignore the reset, close and clone calls. */
	191	}
	192
	193	/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
	194	*uses a clean copy (resetted) of the converter, to convert that unicode
	195	*escape sequence to the target codepage (if conversion failure happens then
	196	*we revert to substituting with subchar)
	197	*/
	198	U_CAPI void U_EXPORT2
	199	UCNV_FROM_U_CALLBACK_ESCAPE (
	200	const void *context,
	201	UConverterFromUnicodeArgs *fromArgs,
	202	const UChar *codeUnits,
	203	int32_t length,
	204	UChar32 codePoint,
	205	UConverterCallbackReason reason,
	206	UErrorCode * err)
	207	{
	208
	209	UChar valueString[VALUE_STRING_LENGTH];
	210	int32_t valueStringLength = 0;
	211	int32_t i = 0;
	212
	213	const UChar *myValueSource = NULL;
	214	UErrorCode err2 = U_ZERO_ERROR;
	215	UConverterFromUCallback original = NULL;
	216	const void *originalContext;
	217
	218	UConverterFromUCallback ignoredCallback = NULL;
	219	const void *ignoredContext;
	220
	221	if (reason > UCNV_IRREGULAR)
	222	{
	223	return;
	224	}
	225	else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
	226	{
	227	/*
	228	* Skip if the codepoint has unicode property of default ignorable.
	229	*/
	230	*err = U_ZERO_ERROR;
	231	return;
	232	}
	233
	234	ucnv_setFromUCallBack (fromArgs->converter,
	235	(UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
	236	NULL,
	237	&original,
	238	&originalContext,
	239	&err2);
	240
	241	if (U_FAILURE (err2))
	242	{
	243	*err = err2;
	244	return;
	245	}
	246	if(context==NULL)
	247	{
	248	while (i < length)
	249	{
	250	valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	251	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	252	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
	253	}
	254	}
	255	else
	256	{
	257	switch(((char)context))
	258	{
	259	case UCNV_PRV_ESCAPE_JAVA:
	260	while (i < length)
	261	{
	262	valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	263	valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
	264	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
	265	}
	266	break;
	267
	268	case UCNV_PRV_ESCAPE_C:
	269	valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	270
	271	if(length==2){
	272	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	273	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
	274
	275	}
	276	else{
	277	valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
	278	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
	279	}
	280	break;
	281
	282	case UCNV_PRV_ESCAPE_XML_DEC:
	283
	284	valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	285	valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	286	if(length==2){
	287	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
	288	}
	289	else{
	290	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
	291	}
	292	valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	293	break;
	294
	295	case UCNV_PRV_ESCAPE_XML_HEX:
	296
	297	valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	298	valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	299	valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
	300	if(length==2){
	301	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
	302	}
	303	else{
	304	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
	305	}
	306	valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	307	break;
	308
	309	case UCNV_PRV_ESCAPE_UNICODE:
	310	valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
	311	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	312	valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
	313	if (length == 2) {
	314	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
	315	} else {
	316	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
	317	}
	318	valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
	319	break;
	320
	321	case UCNV_PRV_ESCAPE_CSS2:
	322	valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	323	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
	324	/* Always add space character, becase the next character might be whitespace,
	325	which would erroneously be considered the termination of the escape sequence. */
	326	valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
	327	break;
	328
	329	default:
	330	while (i < length)
	331	{
	332	valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	333	valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
	334	valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
	335	}
	336	}
	337	}
	338	myValueSource = valueString;
	339
	340	/* reset the error */
	341	*err = U_ZERO_ERROR;
	342
	343	ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
	344
	345	ucnv_setFromUCallBack (fromArgs->converter,
	346	original,
	347	originalContext,
	348	&ignoredCallback,
	349	&ignoredContext,
	350	&err2);
	351	if (U_FAILURE (err2))
	352	{
	353	*err = err2;
	354	return;
	355	}
	356
	357	return;
	358	}
	359
	360
	361
	362	U_CAPI void U_EXPORT2
	363	UCNV_TO_U_CALLBACK_SKIP (
	364	const void *context,
	365	UConverterToUnicodeArgs *toArgs,
	366	const char* codeUnits,
	367	int32_t length,
	368	UConverterCallbackReason reason,
	369	UErrorCode * err)
	370	{
	371	(void)toArgs;
	372	(void)codeUnits;
	373	(void)length;
	374	if (reason <= UCNV_IRREGULAR)
	375	{
	376	if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	377	{
	378	*err = U_ZERO_ERROR;
	379	}
	380	/* else the caller must have set the error code accordingly. */
	381	}
	382	/* else ignore the reset, close and clone calls. */
	383	}
	384
	385	U_CAPI void U_EXPORT2
	386	UCNV_TO_U_CALLBACK_SUBSTITUTE (
	387	const void *context,
	388	UConverterToUnicodeArgs *toArgs,
	389	const char* codeUnits,
	390	int32_t length,
	391	UConverterCallbackReason reason,
	392	UErrorCode * err)
	393	{
	394	(void)codeUnits;
	395	(void)length;
	396	if (reason <= UCNV_IRREGULAR)
	397	{
	398	if (context == NULL \|\| (((char)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
	399	{
	400	*err = U_ZERO_ERROR;
	401	ucnv_cbToUWriteSub(toArgs,0,err);
	402	}
	403	/* else the caller must have set the error code accordingly. */
	404	}
	405	/* else ignore the reset, close and clone calls. */
	406	}
	407
	408	/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
	409	*and uses that as the substitution sequence
	410	*/
	411	U_CAPI void U_EXPORT2
	412	UCNV_TO_U_CALLBACK_ESCAPE (
	413	const void *context,
	414	UConverterToUnicodeArgs *toArgs,
	415	const char* codeUnits,
	416	int32_t length,
	417	UConverterCallbackReason reason,
	418	UErrorCode * err)
	419	{
	420	UChar uniValueString[VALUE_STRING_LENGTH];
	421	int32_t valueStringLength = 0;
	422	int32_t i = 0;
	423
	424	if (reason > UCNV_IRREGULAR)
	425	{
	426	return;
	427	}
	428
	429	if(context==NULL)
	430	{
	431	while (i < length)
	432	{
	433	uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	434	uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
	435	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
	436	}
	437	}
	438	else
	439	{
	440	switch(((char)context))
	441	{
	442	case UCNV_PRV_ESCAPE_XML_DEC:
	443	while (i < length)
	444	{
	445	uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	446	uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	447	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
	448	uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	449	}
	450	break;
	451
	452	case UCNV_PRV_ESCAPE_XML_HEX:
	453	while (i < length)
	454	{
	455	uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
	456	uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
	457	uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
	458	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
	459	uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
	460	}
	461	break;
	462	case UCNV_PRV_ESCAPE_C:
	463	while (i < length)
	464	{
	465	uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
	466	uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
	467	valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
	468	}
	469	break;
	470	default:
	471	while (i < length)
	472	{
	473	uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
	474	uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
	475	uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
	476	valueStringLength += 2;
	477	}
	478	}
	479	}
	480	/* reset the error */
	481	*err = U_ZERO_ERROR;
	482
	483	ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
	484	}
	485
	486	#endif