git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	**********************************************************************
	5	* Copyright (C) 2000-2016, International Business Machines
	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	* file name: ucnv2022.cpp
	9	* encoding: UTF-8
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 2000feb03
	14	* created by: Markus W. Scherer
	15	*
	16	* Change history:
	17	*
	18	* 06/29/2000 helena Major rewrite of the callback APIs.
	19	* 08/08/2000 Ram Included support for ISO-2022-JP-2
	20	* Changed implementation of toUnicode
	21	* function
	22	* 08/21/2000 Ram Added support for ISO-2022-KR
	23	* 08/29/2000 Ram Seperated implementation of EBCDIC to
	24	* ucnvebdc.c
	25	* 09/20/2000 Ram Added support for ISO-2022-CN
	26	* Added implementations for getNextUChar()
	27	* for specific 2022 country variants.
	28	* 10/31/2000 Ram Implemented offsets logic functions
	29	*/
	30
	31	#include "unicode/utypes.h"
	32
	33	#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
	34
	35	#include "unicode/ucnv.h"
	36	#include "unicode/uset.h"
	37	#include "unicode/ucnv_err.h"
	38	#include "unicode/ucnv_cb.h"
	39	#include "unicode/utf16.h"
	40	#include "ucnv_imp.h"
	41	#include "ucnv_bld.h"
	42	#include "ucnv_cnv.h"
	43	#include "ucnvmbcs.h"
	44	#include "cstring.h"
	45	#include "cmemory.h"
	46	#include "uassert.h"
	47
	48	#ifdef U_ENABLE_GENERIC_ISO_2022
	49	/*
	50	* I am disabling the generic ISO-2022 converter after proposing to do so on
	51	* the icu mailing list two days ago.
	52	*
	53	* Reasons:
	54	* 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
	55	* its designation sequences, single shifts with return to the previous state,
	56	* switch-with-no-return to UTF-16BE or similar, etc.
	57	* This is unlike the language-specific variants like ISO-2022-JP which
	58	* require a much smaller repertoire of ISO-2022 features.
	59	* These variants continue to be supported.
	60	* 2. I believe that no one is really using the generic ISO-2022 converter
	61	* but rather always one of the language-specific variants.
	62	* Note that ICU's generic ISO-2022 converter has always output one escape
	63	* sequence followed by UTF-8 for the whole stream.
	64	* 3. Switching between subcharsets is extremely slow, because each time
	65	* the previous converter is closed and a new one opened,
	66	* without any kind of caching, least-recently-used list, etc.
	67	* 4. The code is currently buggy, and given the above it does not seem
	68	* reasonable to spend the time on maintenance.
	69	* 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
	70	* This means, for example, that when ISO-8859-7 is designated, the following
	71	* ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
	72	* The ICU ISO-2022 converter does not handle this - and has no information
	73	* about which subconverter would have to be shifted vs. which is designed
	74	* for 7-bit ISO-2022.
	75	*
	76	* Markus Scherer 2003-dec-03
	77	*/
	78	#endif
	79
	80	#if !UCONFIG_ONLY_HTML_CONVERSION
	81	static const char SHIFT_IN_STR[] = "\x0F";
	82	// static const char SHIFT_OUT_STR[] = "\x0E";
	83	#endif
	84
	85	#define CR 0x0D
	86	#define LF 0x0A
	87	#define H_TAB 0x09
	88	#define V_TAB 0x0B
	89	#define SPACE 0x20
	90
	91	enum {
	92	HWKANA_START=0xff61,
	93	HWKANA_END=0xff9f
	94	};
	95
	96	/*
	97	* 94-character sets with native byte values A1..FE are encoded in ISO 2022
	98	* as bytes 21..7E. (Subtract 0x80.)
	99	* 96-character sets with native byte values A0..FF are encoded in ISO 2022
	100	* as bytes 20..7F. (Subtract 0x80.)
	101	* Do not encode C1 control codes with native bytes 80..9F
	102	* as bytes 00..1F (C0 control codes).
	103	*/
	104	enum {
	105	GR94_START=0xa1,
	106	GR94_END=0xfe,
	107	GR96_START=0xa0,
	108	GR96_END=0xff
	109	};
	110
	111	/*
	112	* ISO 2022 control codes must not be converted from Unicode
	113	* because they would mess up the byte stream.
	114	* The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
	115	* corresponding to SO, SI, and ESC.
	116	*/
	117	#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
	118
	119	/* for ISO-2022-JP and -CN implementations */
	120	typedef enum {
	121	/* shared values */
	122	INVALID_STATE=-1,
	123	ASCII = 0,
	124
	125	SS2_STATE=0x10,
	126	SS3_STATE,
	127
	128	/* JP */
	129	ISO8859_1 = 1 ,
	130	ISO8859_7 = 2 ,
	131	JISX201 = 3,
	132	JISX208 = 4,
	133	JISX212 = 5,
	134	GB2312 =6,
	135	KSC5601 =7,
	136	HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */
	137
	138	/* CN */
	139	/* the first few enum constants must keep their values because they correspond to myConverterArray[] */
	140	GB2312_1=1,
	141	ISO_IR_165=2,
	142	CNS_11643=3,
	143
	144	/*
	145	* these are used in StateEnum and ISO2022State variables,
	146	* but CNS_11643 must be used to index into myConverterArray[]
	147	*/
	148	CNS_11643_0=0x20,
	149	CNS_11643_1,
	150	CNS_11643_2,
	151	CNS_11643_3,
	152	CNS_11643_4,
	153	CNS_11643_5,
	154	CNS_11643_6,
	155	CNS_11643_7
	156	} StateEnum;
	157
	158	/* is the StateEnum charset value for a DBCS charset? */
	159	#if UCONFIG_ONLY_HTML_CONVERSION
	160	#define IS_JP_DBCS(cs) (JISX208==(cs))
	161	#else
	162	#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
	163	#endif
	164
	165	#define CSM(cs) ((uint16_t)1<<(cs))
	166
	167	/*
	168	* Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
	169	* to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
	170	*
	171	* Note: The converter uses some leniency:
	172	* - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
	173	* all versions, not just JIS7 and JIS8.
	174	* - ICU does not distinguish between different versions of JIS X 0208.
	175	*/
	176	#if UCONFIG_ONLY_HTML_CONVERSION
	177	enum { MAX_JA_VERSION=0 };
	178	#else
	179	enum { MAX_JA_VERSION=4 };
	180	#endif
	181	static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
	182	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT),
	183	#if !UCONFIG_ONLY_HTML_CONVERSION
	184	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212),
	185	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB2312)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),
	186	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB2312)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7),
	187	CSM(ASCII)\|CSM(JISX201)\|CSM(JISX208)\|CSM(HWKANA_7BIT)\|CSM(JISX212)\|CSM(GB2312)\|CSM(KSC5601)\|CSM(ISO8859_1)\|CSM(ISO8859_7)
	188	#endif
	189	};
	190
	191	typedef enum {
	192	ASCII1=0,
	193	LATIN1,
	194	SBCS,
	195	DBCS,
	196	MBCS,
	197	HWKANA
	198	}Cnv2022Type;
	199
	200	typedef struct ISO2022State {
	201	int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
	202	int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
	203	int8_t prevG; /* g before single shift (SS2 or SS3) */
	204	} ISO2022State;
	205
	206	#define UCNV_OPTIONS_VERSION_MASK 0xf
	207	#define UCNV_2022_MAX_CONVERTERS 10
	208
	209	typedef struct{
	210	UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
	211	UConverter *currentConverter;
	212	Cnv2022Type currentType;
	213	ISO2022State toU2022State, fromU2022State;
	214	uint32_t key;
	215	uint32_t version;
	216	#ifdef U_ENABLE_GENERIC_ISO_2022
	217	UBool isFirstBuffer;
	218	#endif
	219	UBool isEmptySegment;
	220	char name[30];
	221	char locale[3];
	222	}UConverterDataISO2022;
	223
	224	/* Protos */
	225	/* ISO-2022 ----------------------------------------------------------------- */
	226
	227	/Forward declaration /
	228	U_CFUNC void U_CALLCONV
	229	ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
	230	UErrorCode * err);
	231	U_CFUNC void U_CALLCONV
	232	ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
	233	UErrorCode * err);
	234
	235	#define ESC_2022 0x1B /ESC/
	236
	237	typedef enum
	238	{
	239	INVALID_2022 = -1, /Doesn't correspond to a valid iso 2022 escape sequence/
	240	VALID_NON_TERMINAL_2022 = 0, /so far corresponds to a valid iso 2022 escape sequence/
	241	VALID_TERMINAL_2022 = 1, /corresponds to a valid iso 2022 escape sequence/
	242	VALID_MAYBE_TERMINAL_2022 = 2 /so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence/
	243	} UCNV_TableStates_2022;
	244
	245	/*
	246	* The way these state transition arrays work is:
	247	* ex : ESC$B is the sequence for JISX208
	248	* a) First Iteration: char is ESC
	249	* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
	250	* int x = normalize_esq_chars_2022[27] which is equal to 1
	251	* ii) Search for this value in escSeqStateTable_Key_2022[]
	252	* value of x is stored at escSeqStateTable_Key_2022[0]
	253	* iii) Save this index as offset
	254	* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
	255	* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
	256	* b) Switch on this state and continue to next char
	257	* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
	258	* which is normalize_esq_chars_2022[36] == 4
	259	* ii) x is currently 1(from above)
	260	* x<<=5 -- x is now 32
	261	* x+=normalize_esq_chars_2022[36]
	262	* now x is 36
	263	* iii) Search for this value in escSeqStateTable_Key_2022[]
	264	* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
	265	* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
	266	* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
	267	* c) Switch on this state and continue to next char
	268	* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
	269	* ii) x is currently 36 (from above)
	270	* x<<=5 -- x is now 1152
	271	* x+=normalize_esq_chars_2022[66]
	272	* now x is 1161
	273	* iii) Search for this value in escSeqStateTable_Key_2022[]
	274	* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
	275	* iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
	276	* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
	277	* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
	278	*/
	279
	280
	281	/Below are the 3 arrays depicting a state transition table/
	282	static const int8_t normalize_esq_chars_2022[256] = {
	283	/* 0 1 2 3 4 5 6 7 8 9 */
	284
	285	0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	286	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	287	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0
	288	,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0
	289	,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0
	290	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	291	,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12
	292	,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28
	293	,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	294	,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	295	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	296	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	297	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	298	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	299	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	300	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	301	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	302	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	303	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	304	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	305	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	306	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	307	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	308	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	309	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
	310	,0 ,0 ,0 ,0 ,0 ,0
	311	};
	312
	313	#ifdef U_ENABLE_GENERIC_ISO_2022
	314	/*
	315	* When the generic ISO-2022 converter is completely removed, not just disabled
	316	* per #ifdef, then the following state table and the associated tables that are
	317	* dimensioned with MAX_STATES_2022 should be trimmed.
	318	*
	319	* Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
	320	* the associated escape sequences starting with ESC ( B should be removed.
	321	* This includes the ones with key values 1097 and all of the ones above 1000000.
	322	*
	323	* For the latter, the tables can simply be truncated.
	324	* For the former, since the tables must be kept parallel, it is probably best
	325	* to simply duplicate an adjacent table cell, parallel in all tables.
	326	*
	327	* It may make sense to restructure the tables, especially by using small search
	328	* tables for the variants instead of indexing them parallel to the table here.
	329	*/
	330	#endif
	331
	332	#define MAX_STATES_2022 74
	333	static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
	334	/* 0 1 2 3 4 5 6 7 8 9 */
	335
	336	1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096
	337	,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106
	338	,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257
	339	,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940
	340	,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644
	341	,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138
	342	,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630
	343	,35947631 ,35947635 ,35947636 ,35947638
	344	};
	345
	346	#ifdef U_ENABLE_GENERIC_ISO_2022
	347
	348	static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
	349	/* 0 1 2 3 4 5 6 7 8 9 */
	350
	351	NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1"
	352	,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"
	353	,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8"
	354	,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"
	355	,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"
	356	,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
	357	,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"
	358	,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"
	359	};
	360
	361	#endif
	362
	363	static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
	364	/* 0 1 2 3 4 5 6 7 8 9 */
	365	VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
	366	,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
	367	,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
	368	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
	369	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
	370	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
	371	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
	372	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
	373	};
	374
	375	/* Type def for refactoring changeState_2022 code*/
	376	typedef enum{
	377	#ifdef U_ENABLE_GENERIC_ISO_2022
	378	ISO_2022=0,
	379	#endif
	380	ISO_2022_JP=1,
	381	#if !UCONFIG_ONLY_HTML_CONVERSION
	382	ISO_2022_KR=2,
	383	ISO_2022_CN=3
	384	#endif
	385	} Variant2022;
	386
	387	/********* ISO 2022 Converter Protos *********/
	388	static void U_CALLCONV
	389	_ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode);
	390
	391	static void U_CALLCONV
	392	_ISO2022Close(UConverter *converter);
	393
	394	static void U_CALLCONV
	395	_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
	396
	397	U_CDECL_BEGIN
	398	static const char * U_CALLCONV
	399	_ISO2022getName(const UConverter* cnv);
	400	U_CDECL_END
	401
	402	static void U_CALLCONV
	403	_ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorCode err);
	404
	405	U_CDECL_BEGIN
	406	static UConverter * U_CALLCONV
	407	_ISO_2022_SafeClone(const UConverter cnv, void stackBuffer, int32_t pBufferSize, UErrorCode status);
	408
	409	U_CDECL_END
	410
	411	#ifdef U_ENABLE_GENERIC_ISO_2022
	412	static void U_CALLCONV
	413	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
	414	#endif
	415
	416	namespace {
	417
	418	/const UConverterSharedData _ISO2022Data;/
	419	extern const UConverterSharedData _ISO2022JPData;
	420
	421	#if !UCONFIG_ONLY_HTML_CONVERSION
	422	extern const UConverterSharedData _ISO2022KRData;
	423	extern const UConverterSharedData _ISO2022CNData;
	424	#endif
	425
	426	} // namespace
	427
	428	/************* Converter implementations ****************/
	429
	430	/* The purpose of this function is to get around gcc compiler warnings. */
	431	static inline void
	432	fromUWriteUInt8(UConverter *cnv,
	433	const char *bytes, int32_t length,
	434	uint8_t *target, const char targetLimit,
	435	int32_t **offsets,
	436	int32_t sourceIndex,
	437	UErrorCode *pErrorCode)
	438	{
	439	char targetChars = (char )*target;
	440	ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
	441	offsets, sourceIndex, pErrorCode);
	442	target = (uint8_t)targetChars;
	443
	444	}
	445
	446	static inline void
	447	setInitialStateToUnicodeKR(UConverter* /converter/, UConverterDataISO2022 *myConverterData){
	448	if(myConverterData->version == 1) {
	449	UConverter *cnv = myConverterData->currentConverter;
	450
	451	cnv->toUnicodeStatus=0; /* offset */
	452	cnv->mode=0; /* state */
	453	cnv->toULength=0; /* byteIndex */
	454	}
	455	}
	456
	457	static inline void
	458	setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
	459	/* in ISO-2022-KR the designator sequence appears only once
	460	* in a file so we append it only once
	461	*/
	462	if( converter->charErrorBufferLength==0){
	463
	464	converter->charErrorBufferLength = 4;
	465	converter->charErrorBuffer[0] = 0x1b;
	466	converter->charErrorBuffer[1] = 0x24;
	467	converter->charErrorBuffer[2] = 0x29;
	468	converter->charErrorBuffer[3] = 0x43;
	469	}
	470	if(myConverterData->version == 1) {
	471	UConverter *cnv = myConverterData->currentConverter;
	472
	473	cnv->fromUChar32=0;
	474	cnv->fromUnicodeStatus=1; /* prevLength */
	475	}
	476	}
	477
	478	static void U_CALLCONV
	479	_ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode){
	480
	481	char myLocale[6]={' ',' ',' ',' ',' ',' '};
	482
	483	cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
	484	if(cnv->extraInfo != NULL) {
	485	UConverterNamePieces stackPieces;
	486	UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
	487	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->extraInfo;
	488	uint32_t version;
	489
	490	stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
	491
	492	uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
	493	myConverterData->currentType = ASCII1;
	494	cnv->fromUnicodeStatus =FALSE;
	495	if(pArgs->locale){
	496	uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
	497	}
	498	version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
	499	myConverterData->version = version;
	500	if(myLocale[0]=='j' && (myLocale[1]=='a'\|\| myLocale[1]=='p') &&
	501	(myLocale[2]=='_' \|\| myLocale[2]=='\0'))
	502	{
	503	/* open the required converters and cache them */
	504	if(version>MAX_JA_VERSION) {
	505	// ICU 55 fails to open a converter for an unsupported version.
	506	// Previously, it fell back to version 0, but that would yield
	507	// unexpected behavior.
	508	*errorCode = U_MISSING_RESOURCE_ERROR;
	509	return;
	510	}
	511	if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
	512	myConverterData->myConverterArray[ISO8859_7] =
	513	ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
	514	}
	515	myConverterData->myConverterArray[JISX208] =
	516	ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
	517	if(jpCharsetMasks[version]&CSM(JISX212)) {
	518	myConverterData->myConverterArray[JISX212] =
	519	ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
	520	}
	521	if(jpCharsetMasks[version]&CSM(GB2312)) {
	522	myConverterData->myConverterArray[GB2312] =
	523	ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
	524	}
	525	if(jpCharsetMasks[version]&CSM(KSC5601)) {
	526	myConverterData->myConverterArray[KSC5601] =
	527	ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
	528	}
	529
	530	/* set the function pointers to appropriate funtions */
	531	cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
	532	uprv_strcpy(myConverterData->locale,"ja");
	533
	534	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
	535	size_t len = uprv_strlen(myConverterData->name);
	536	myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
	537	myConverterData->name[len+1]='\0';
	538	}
	539	#if !UCONFIG_ONLY_HTML_CONVERSION
	540	else if(myLocale[0]=='k' && (myLocale[1]=='o'\|\| myLocale[1]=='r') &&
	541	(myLocale[2]=='_' \|\| myLocale[2]=='\0'))
	542	{
	543	if(version>1) {
	544	// ICU 55 fails to open a converter for an unsupported version.
	545	// Previously, it fell back to version 0, but that would yield
	546	// unexpected behavior.
	547	*errorCode = U_MISSING_RESOURCE_ERROR;
	548	return;
	549	}
	550	const char *cnvName;
	551	if(version==1) {
	552	cnvName="icu-internal-25546";
	553	} else {
	554	cnvName="ibm-949";
	555	myConverterData->version=version=0;
	556	}
	557	if(pArgs->onlyTestIsLoadable) {
	558	ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */
	559	uprv_free(cnv->extraInfo);
	560	cnv->extraInfo=NULL;
	561	return;
	562	} else {
	563	myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
	564	if (U_FAILURE(*errorCode)) {
	565	_ISO2022Close(cnv);
	566	return;
	567	}
	568
	569	if(version==1) {
	570	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
	571	uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
	572	cnv->subCharLen = myConverterData->currentConverter->subCharLen;
	573	}else{
	574	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
	575	}
	576
	577	/* initialize the state variables */
	578	setInitialStateToUnicodeKR(cnv, myConverterData);
	579	setInitialStateFromUnicodeKR(cnv, myConverterData);
	580
	581	/* set the function pointers to appropriate funtions */
	582	cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
	583	uprv_strcpy(myConverterData->locale,"ko");
	584	}
	585	}
	586	else if(((myLocale[0]=='z' && myLocale[1]=='h') \|\| (myLocale[0]=='c'&& myLocale[1]=='n'))&&
	587	(myLocale[2]=='_' \|\| myLocale[2]=='\0'))
	588	{
	589	if(version>2) {
	590	// ICU 55 fails to open a converter for an unsupported version.
	591	// Previously, it fell back to version 0, but that would yield
	592	// unexpected behavior.
	593	*errorCode = U_MISSING_RESOURCE_ERROR;
	594	return;
	595	}
	596
	597	/* open the required converters and cache them */
	598	myConverterData->myConverterArray[GB2312_1] =
	599	ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
	600	if(version==1) {
	601	myConverterData->myConverterArray[ISO_IR_165] =
	602	ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
	603	}
	604	myConverterData->myConverterArray[CNS_11643] =
	605	ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
	606
	607
	608	/* set the function pointers to appropriate funtions */
	609	cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
	610	uprv_strcpy(myConverterData->locale,"cn");
	611
	612	if (version==0){
	613	myConverterData->version = 0;
	614	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
	615	}else if (version==1){
	616	myConverterData->version = 1;
	617	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
	618	}else {
	619	myConverterData->version = 2;
	620	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
	621	}
	622	}
	623	#endif // !UCONFIG_ONLY_HTML_CONVERSION
	624	else{
	625	#ifdef U_ENABLE_GENERIC_ISO_2022
	626	myConverterData->isFirstBuffer = TRUE;
	627
	628	/* append the UTF-8 escape sequence */
	629	cnv->charErrorBufferLength = 3;
	630	cnv->charErrorBuffer[0] = 0x1b;
	631	cnv->charErrorBuffer[1] = 0x25;
	632	cnv->charErrorBuffer[2] = 0x42;
	633
	634	cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
	635	/* initialize the state variables */
	636	uprv_strcpy(myConverterData->name,"ISO_2022");
	637	#else
	638	*errorCode = U_MISSING_RESOURCE_ERROR;
	639	// Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
	640	// data loading error code.
	641	return;
	642	#endif
	643	}
	644
	645	cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
	646
	647	if(U_FAILURE(*errorCode) \|\| pArgs->onlyTestIsLoadable) {
	648	_ISO2022Close(cnv);
	649	}
	650	} else {
	651	*errorCode = U_MEMORY_ALLOCATION_ERROR;
	652	}
	653	}
	654
	655
	656	static void U_CALLCONV
	657	_ISO2022Close(UConverter *converter) {
	658	UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
	659	UConverterSharedData **array = myData->myConverterArray;
	660	int32_t i;
	661
	662	if (converter->extraInfo != NULL) {
	663	/close the array of converter pointers and free the memory/
	664	for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
	665	if(array[i]!=NULL) {
	666	ucnv_unloadSharedDataIfReady(array[i]);
	667	}
	668	}
	669
	670	ucnv_close(myData->currentConverter);
	671
	672	if(!converter->isExtraLocal){
	673	uprv_free (converter->extraInfo);
	674	converter->extraInfo = NULL;
	675	}
	676	}
	677	}
	678
	679	static void U_CALLCONV
	680	_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
	681	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) (converter->extraInfo);
	682	if(choice<=UCNV_RESET_TO_UNICODE) {
	683	uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
	684	myConverterData->key = 0;
	685	myConverterData->isEmptySegment = FALSE;
	686	}
	687	if(choice!=UCNV_RESET_TO_UNICODE) {
	688	uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
	689	}
	690	#ifdef U_ENABLE_GENERIC_ISO_2022
	691	if(myConverterData->locale[0] == 0){
	692	if(choice<=UCNV_RESET_TO_UNICODE) {
	693	myConverterData->isFirstBuffer = TRUE;
	694	myConverterData->key = 0;
	695	if (converter->mode == UCNV_SO){
	696	ucnv_close (myConverterData->currentConverter);
	697	myConverterData->currentConverter=NULL;
	698	}
	699	converter->mode = UCNV_SI;
	700	}
	701	if(choice!=UCNV_RESET_TO_UNICODE) {
	702	/* re-append UTF-8 escape sequence */
	703	converter->charErrorBufferLength = 3;
	704	converter->charErrorBuffer[0] = 0x1b;
	705	converter->charErrorBuffer[1] = 0x28;
	706	converter->charErrorBuffer[2] = 0x42;
	707	}
	708	}
	709	else
	710	#endif
	711	{
	712	/* reset the state variables */
	713	if(myConverterData->locale[0] == 'k'){
	714	if(choice<=UCNV_RESET_TO_UNICODE) {
	715	setInitialStateToUnicodeKR(converter, myConverterData);
	716	}
	717	if(choice!=UCNV_RESET_TO_UNICODE) {
	718	setInitialStateFromUnicodeKR(converter, myConverterData);
	719	}
	720	}
	721	}
	722	}
	723
	724	U_CDECL_BEGIN
	725
	726	static const char * U_CALLCONV
	727	_ISO2022getName(const UConverter* cnv){
	728	if(cnv->extraInfo){
	729	UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
	730	return myData->name;
	731	}
	732	return NULL;
	733	}
	734
	735	U_CDECL_END
	736
	737
	738	/************* to unicode *****************/
	739	/****************************************************************************
	740	* Recognized escape sequences are
	741	* <ESC>(B ASCII
	742	* <ESC>.A ISO-8859-1
	743	* <ESC>.F ISO-8859-7
	744	* <ESC>(J JISX-201
	745	* <ESC>(I JISX-201
	746	* <ESC>$B JISX-208
	747	* <ESC>$@ JISX-208
	748	* <ESC>$(D JISX-212
	749	* <ESC>$A GB2312
	750	* <ESC>$(C KSC5601
	751	*/
	752	static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
	753	/* 0 1 2 3 4 5 6 7 8 9 */
	754	INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	755	,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE
	756	,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	757	,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE
	758	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	759	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	760	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	761	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	762	};
	763
	764	#if !UCONFIG_ONLY_HTML_CONVERSION
	765	/************* to unicode *****************/
	766	static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
	767	/* 0 1 2 3 4 5 6 7 8 9 */
	768	INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	769	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	770	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	771	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	772	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
	773	,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	774	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	775	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
	776	};
	777	#endif
	778
	779
	780	static UCNV_TableStates_2022
	781	getKey_2022(char c,int32_t* key,int32_t* offset){
	782	int32_t togo;
	783	int32_t low = 0;
	784	int32_t hi = MAX_STATES_2022;
	785	int32_t oldmid=0;
	786
	787	togo = normalize_esq_chars_2022[(uint8_t)c];
	788	if(togo == 0) {
	789	/* not a valid character anywhere in an escape sequence */
	790	*key = 0;
	791	*offset = 0;
	792	return INVALID_2022;
	793	}
	794	togo = (*key << 5) + togo;
	795
	796	while (hi != low) /binary search/{
	797
	798	int32_t mid = (hi+low) >> 1; /Finds median/
	799
	800	if (mid == oldmid)
	801	break;
	802
	803	if (escSeqStateTable_Key_2022[mid] > togo){
	804	hi = mid;
	805	}
	806	else if (escSeqStateTable_Key_2022[mid] < togo){
	807	low = mid;
	808	}
	809	else /we found it/{
	810	*key = togo;
	811	*offset = mid;
	812	return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
	813	}
	814	oldmid = mid;
	815
	816	}
	817
	818	*key = 0;
	819	*offset = 0;
	820	return INVALID_2022;
	821	}
	822
	823	/*runs through a state machine to determine the escape sequence - codepage correspondance
	824	*/
	825	static void
	826	changeState_2022(UConverter* _this,
	827	const char** source,
	828	const char* sourceLimit,
	829	Variant2022 var,
	830	UErrorCode* err){
	831	UCNV_TableStates_2022 value;
	832	UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
	833	uint32_t key = myData2022->key;
	834	int32_t offset = 0;
	835	int8_t initialToULength = _this->toULength;
	836	char c;
	837
	838	value = VALID_NON_TERMINAL_2022;
	839	while (*source < sourceLimit) {
	840	c = (source)++;
	841	_this->toUBytes[_this->toULength++]=(uint8_t)c;
	842	value = getKey_2022(c,(int32_t *) &key, &offset);
	843
	844	switch (value){
	845
	846	case VALID_NON_TERMINAL_2022 :
	847	/* continue with the loop */
	848	break;
	849
	850	case VALID_TERMINAL_2022:
	851	key = 0;
	852	goto DONE;
	853
	854	case INVALID_2022:
	855	goto DONE;
	856
	857	case VALID_MAYBE_TERMINAL_2022:
	858	#ifdef U_ENABLE_GENERIC_ISO_2022
	859	/* ESC ( B is ambiguous only for ISO_2022 itself */
	860	if(var == ISO_2022) {
	861	/* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
	862	_this->toULength = 0;
	863
	864	/* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
	865
	866	/* continue with the loop */
	867	value = VALID_NON_TERMINAL_2022;
	868	break;
	869	} else
	870	#endif
	871	{
	872	/* not ISO_2022 itself, finish here */
	873	value = VALID_TERMINAL_2022;
	874	key = 0;
	875	goto DONE;
	876	}
	877	}
	878	}
	879
	880	DONE:
	881	myData2022->key = key;
	882
	883	if (value == VALID_NON_TERMINAL_2022) {
	884	/* indicate that the escape sequence is incomplete: key!=0 */
	885	return;
	886	} else if (value == INVALID_2022 ) {
	887	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	888	} else /* value == VALID_TERMINAL_2022 */ {
	889	switch(var){
	890	#ifdef U_ENABLE_GENERIC_ISO_2022
	891	case ISO_2022:
	892	{
	893	const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
	894	if(chosenConverterName == NULL) {
	895	/* SS2 or SS3 */
	896	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	897	_this->toUCallbackReason = UCNV_UNASSIGNED;
	898	return;
	899	}
	900
	901	_this->mode = UCNV_SI;
	902	ucnv_close(myData2022->currentConverter);
	903	myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
	904	if(U_SUCCESS(*err)) {
	905	myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
	906	_this->mode = UCNV_SO;
	907	}
	908	break;
	909	}
	910	#endif
	911	case ISO_2022_JP:
	912	{
	913	StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
	914	switch(tempState) {
	915	case INVALID_STATE:
	916	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	917	break;
	918	case SS2_STATE:
	919	if(myData2022->toU2022State.cs[2]!=0) {
	920	if(myData2022->toU2022State.g<2) {
	921	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
	922	}
	923	myData2022->toU2022State.g=2;
	924	} else {
	925	/* illegal to have SS2 before a matching designator */
	926	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	927	}
	928	break;
	929	/* case SS3_STATE: not used in ISO-2022-JP-x */
	930	case ISO8859_1:
	931	case ISO8859_7:
	932	if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
	933	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	934	} else {
	935	/* G2 charset for SS2 */
	936	myData2022->toU2022State.cs[2]=(int8_t)tempState;
	937	}
	938	break;
	939	default:
	940	if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
	941	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	942	} else {
	943	/* G0 charset */
	944	myData2022->toU2022State.cs[0]=(int8_t)tempState;
	945	}
	946	break;
	947	}
	948	}
	949	break;
	950	#if !UCONFIG_ONLY_HTML_CONVERSION
	951	case ISO_2022_CN:
	952	{
	953	StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
	954	switch(tempState) {
	955	case INVALID_STATE:
	956	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	957	break;
	958	case SS2_STATE:
	959	if(myData2022->toU2022State.cs[2]!=0) {
	960	if(myData2022->toU2022State.g<2) {
	961	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
	962	}
	963	myData2022->toU2022State.g=2;
	964	} else {
	965	/* illegal to have SS2 before a matching designator */
	966	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	967	}
	968	break;
	969	case SS3_STATE:
	970	if(myData2022->toU2022State.cs[3]!=0) {
	971	if(myData2022->toU2022State.g<2) {
	972	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
	973	}
	974	myData2022->toU2022State.g=3;
	975	} else {
	976	/* illegal to have SS3 before a matching designator */
	977	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	978	}
	979	break;
	980	case ISO_IR_165:
	981	if(myData2022->version==0) {
	982	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	983	break;
	984	}
	985	U_FALLTHROUGH;
	986	case GB2312_1:
	987	U_FALLTHROUGH;
	988	case CNS_11643_1:
	989	myData2022->toU2022State.cs[1]=(int8_t)tempState;
	990	break;
	991	case CNS_11643_2:
	992	myData2022->toU2022State.cs[2]=(int8_t)tempState;
	993	break;
	994	default:
	995	/* other CNS 11643 planes */
	996	if(myData2022->version==0) {
	997	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	998	} else {
	999	myData2022->toU2022State.cs[3]=(int8_t)tempState;
	1000	}
	1001	break;
	1002	}
	1003	}
	1004	break;
	1005	case ISO_2022_KR:
	1006	if(offset==0x30){
	1007	/* nothing to be done, just accept this one escape sequence */
	1008	} else {
	1009	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
	1010	}
	1011	break;
	1012	#endif // !UCONFIG_ONLY_HTML_CONVERSION
	1013
	1014	default:
	1015	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	1016	break;
	1017	}
	1018	}
	1019	if(U_SUCCESS(*err)) {
	1020	_this->toULength = 0;
	1021	} else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
	1022	if(_this->toULength>1) {
	1023	/*
	1024	* Ticket 5691: consistent illegal sequences:
	1025	* - We include at least the first byte (ESC) in the illegal sequence.
	1026	* - If any of the non-initial bytes could be the start of a character,
	1027	* we stop the illegal sequence before the first one of those.
	1028	* In escape sequences, all following bytes are "printable", that is,
	1029	* unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
	1030	* they are valid single/lead bytes.
	1031	* For simplicity, we always only report the initial ESC byte as the
	1032	* illegal sequence and back out all other bytes we looked at.
	1033	*/
	1034	/* Back out some bytes. */
	1035	int8_t backOutDistance=_this->toULength-1;
	1036	int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
	1037	if(backOutDistance<=bytesFromThisBuffer) {
	1038	/* same as initialToULength<=1 */
	1039	*source-=backOutDistance;
	1040	} else {
	1041	/* Back out bytes from the previous buffer: Need to replay them. */
	1042	_this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
	1043	/* same as -(initialToULength-1) */
	1044	/* preToULength is negative! */
	1045	uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
	1046	*source-=bytesFromThisBuffer;
	1047	}
	1048	_this->toULength=1;
	1049	}
	1050	} else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
	1051	_this->toUCallbackReason = UCNV_UNASSIGNED;
	1052	}
	1053	}
	1054
	1055	#if !UCONFIG_ONLY_HTML_CONVERSION
	1056	/*Checks the characters of the buffer against valid 2022 escape sequences
	1057	*if the match we return a pointer to the initial start of the sequence otherwise
	1058	*we return sourceLimit
	1059	*/
	1060	/*for 2022 looks ahead in the stream
	1061	*to determine the longest possible convertible
	1062	*data stream
	1063	*/
	1064	static inline const char*
	1065	getEndOfBuffer_2022(const char** source,
	1066	const char* sourceLimit,
	1067	UBool /flush/){
	1068
	1069	const char* mySource = *source;
	1070
	1071	#ifdef U_ENABLE_GENERIC_ISO_2022
	1072	if (*source >= sourceLimit)
	1073	return sourceLimit;
	1074
	1075	do{
	1076
	1077	if (*mySource == ESC_2022){
	1078	int8_t i;
	1079	int32_t key = 0;
	1080	int32_t offset;
	1081	UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
	1082
	1083	/* Kludge: I could not
	1084	* figure out the reason for validating an escape sequence
	1085	* twice - once here and once in changeState_2022().
	1086	* is it possible to have an ESC character in a ISO2022
	1087	* byte stream which is valid in a code page? Is it legal?
	1088	*/
	1089	for (i=0;
	1090	(mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
	1091	i++) {
	1092	value = getKey_2022(*(mySource+i), &key, &offset);
	1093	}
	1094	if (value > 0 \|\| *mySource==ESC_2022)
	1095	return mySource;
	1096
	1097	if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
	1098	return sourceLimit;
	1099	}
	1100	}while (++mySource < sourceLimit);
	1101
	1102	return sourceLimit;
	1103	#else
	1104	while(mySource < sourceLimit && *mySource != ESC_2022) {
	1105	++mySource;
	1106	}
	1107	return mySource;
	1108	#endif
	1109	}
	1110	#endif
	1111
	1112	/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
	1113	* any future change in _MBCSFromUChar32() function should be reflected here.
	1114	* @return number of bytes in *value; negative number if fallback; 0 if no mapping
	1115	*/
	1116	static inline int32_t
	1117	MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
	1118	UChar32 c,
	1119	uint32_t* value,
	1120	UBool useFallback,
	1121	int outputType)
	1122	{
	1123	const int32_t *cx;
	1124	const uint16_t *table;
	1125	uint32_t stage2Entry;
	1126	uint32_t myValue;
	1127	int32_t length;
	1128	const uint8_t *p;
	1129	/*
	1130	* TODO(markus): Use and require new, faster MBCS conversion table structures.
	1131	* Use internal version of ucnv_open() that verifies that the new structures are available,
	1132	* else U_INTERNAL_PROGRAM_ERROR.
	1133	*/
	1134	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	1135	if(c<0x10000 \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	1136	table=sharedData->mbcs.fromUnicodeTable;
	1137	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	1138	/* get the bytes and the length for the output */
	1139	if(outputType==MBCS_OUTPUT_2){
	1140	myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	1141	if(myValue<=0xff) {
	1142	length=1;
	1143	} else {
	1144	length=2;
	1145	}
	1146	} else /* outputType==MBCS_OUTPUT_3 */ {
	1147	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	1148	myValue=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	1149	if(myValue<=0xff) {
	1150	length=1;
	1151	} else if(myValue<=0xffff) {
	1152	length=2;
	1153	} else {
	1154	length=3;
	1155	}
	1156	}
	1157	/* is this code point assigned, or do we use fallbacks? */
	1158	if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
	1159	/* assigned */
	1160	*value=myValue;
	1161	return length;
	1162	} else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
	1163	/*
	1164	* We allow a 0 byte output if the "assigned" bit is set for this entry.
	1165	* There is no way with this data structure for fallback output
	1166	* to be a zero byte.
	1167	*/
	1168	*value=myValue;
	1169	return -length;
	1170	}
	1171	}
	1172
	1173	cx=sharedData->mbcs.extIndexes;
	1174	if(cx!=NULL) {
	1175	return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
	1176	}
	1177
	1178	/* unassigned */
	1179	return 0;
	1180	}
	1181
	1182	/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
	1183	* any future change in _MBCSSingleFromUChar32() function should be reflected here.
	1184	* @param retval pointer to output byte
	1185	* @return 1 roundtrip byte 0 no mapping -1 fallback byte
	1186	*/
	1187	static inline int32_t
	1188	MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
	1189	UChar32 c,
	1190	uint32_t* retval,
	1191	UBool useFallback)
	1192	{
	1193	const uint16_t *table;
	1194	int32_t value;
	1195	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	1196	if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	1197	return 0;
	1198	}
	1199	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
	1200	table=sharedData->mbcs.fromUnicodeTable;
	1201	/* get the byte for the output */
	1202	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
	1203	/* is this code point assigned, or do we use fallbacks? */
	1204	*retval=(uint32_t)(value&0xff);
	1205	if(value>=0xf00) {
	1206	return 1; /* roundtrip */
	1207	} else if(useFallback ? value>=0x800 : value>=0xc00) {
	1208	return -1; /* fallback taken */
	1209	} else {
	1210	return 0; /* no mapping */
	1211	}
	1212	}
	1213
	1214	/*
	1215	* Check that the result is a 2-byte value with each byte in the range A1..FE
	1216	* (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
	1217	* to move it to the ISO 2022 range 21..7E.
	1218	* Return 0 if out of range.
	1219	*/
	1220	static inline uint32_t
	1221	_2022FromGR94DBCS(uint32_t value) {
	1222	if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
	1223	(uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
	1224	) {
	1225	return value - 0x8080; /* shift down to 21..7e byte range */
	1226	} else {
	1227	return 0; /* not valid for ISO 2022 */
	1228	}
	1229	}
	1230
	1231	#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
	1232	/*
	1233	* This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
	1234	* 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
	1235	* unchanged.
	1236	*/
	1237	static inline uint32_t
	1238	_2022ToGR94DBCS(uint32_t value) {
	1239	uint32_t returnValue = value + 0x8080;
	1240	if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
	1241	(uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
	1242	return returnValue;
	1243	} else {
	1244	return value;
	1245	}
	1246	}
	1247	#endif
	1248
	1249	#ifdef U_ENABLE_GENERIC_ISO_2022
	1250
	1251	/**********************************************************************************
	1252	* ISO-2022 Converter
	1253	*
	1254	*
	1255	*/
	1256
	1257	static void U_CALLCONV
	1258	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
	1259	UErrorCode* err){
	1260	const char* mySourceLimit, *realSourceLimit;
	1261	const char* sourceStart;
	1262	const UChar* myTargetStart;
	1263	UConverter* saveThis;
	1264	UConverterDataISO2022* myData;
	1265	int8_t length;
	1266
	1267	saveThis = args->converter;
	1268	myData=((UConverterDataISO2022*)(saveThis->extraInfo));
	1269
	1270	realSourceLimit = args->sourceLimit;
	1271	while (args->source < realSourceLimit) {
	1272	if(myData->key == 0) { /* are we in the middle of an escape sequence? */
	1273	/Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/
	1274	mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
	1275
	1276	if(args->source < mySourceLimit) {
	1277	if(myData->currentConverter==NULL) {
	1278	myData->currentConverter = ucnv_open("ASCII",err);
	1279	if(U_FAILURE(*err)){
	1280	return;
	1281	}
	1282
	1283	myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
	1284	saveThis->mode = UCNV_SO;
	1285	}
	1286
	1287	/* convert to before the ESC or until the end of the buffer */
	1288	myData->isFirstBuffer=FALSE;
	1289	sourceStart = args->source;
	1290	myTargetStart = args->target;
	1291	args->converter = myData->currentConverter;
	1292	ucnv_toUnicode(args->converter,
	1293	&args->target,
	1294	args->targetLimit,
	1295	&args->source,
	1296	mySourceLimit,
	1297	args->offsets,
	1298	(UBool)(args->flush && mySourceLimit == realSourceLimit),
	1299	err);
	1300	args->converter = saveThis;
	1301
	1302	if (*err == U_BUFFER_OVERFLOW_ERROR) {
	1303	/* move the overflow buffer */
	1304	length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
	1305	myData->currentConverter->UCharErrorBufferLength = 0;
	1306	if(length > 0) {
	1307	uprv_memcpy(saveThis->UCharErrorBuffer,
	1308	myData->currentConverter->UCharErrorBuffer,
	1309	length*U_SIZEOF_UCHAR);
	1310	}
	1311	return;
	1312	}
	1313
	1314	/*
	1315	* At least one of:
	1316	* -Error while converting
	1317	* -Done with entire buffer
	1318	* -Need to write offsets or update the current offset
	1319	* (leave that up to the code in ucnv.c)
	1320	*
	1321	* or else we just stopped at an ESC byte and continue with changeState_2022()
	1322	*/
	1323	if (U_FAILURE(*err) \|\|
	1324	(args->source == realSourceLimit) \|\|
	1325	(args->offsets != NULL && (args->target != myTargetStart \|\| args->source != sourceStart) \|\|
	1326	(mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
	1327	) {
	1328	/* copy partial or error input for truncated detection and error handling */
	1329	if(U_FAILURE(*err)) {
	1330	length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
	1331	if(length > 0) {
	1332	uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
	1333	}
	1334	} else {
	1335	length = saveThis->toULength = myData->currentConverter->toULength;
	1336	if(length > 0) {
	1337	uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
	1338	if(args->source < mySourceLimit) {
	1339	err = U_TRUNCATED_CHAR_FOUND; / truncated input before ESC */
	1340	}
	1341	}
	1342	}
	1343	return;
	1344	}
	1345	}
	1346	}
	1347
	1348	sourceStart = args->source;
	1349	changeState_2022(args->converter,
	1350	&(args->source),
	1351	realSourceLimit,
	1352	ISO_2022,
	1353	err);
	1354	if (U_FAILURE(*err) \|\| (args->source != sourceStart && args->offsets != NULL)) {
	1355	/* let the ucnv.c code update its current offset */
	1356	return;
	1357	}
	1358	}
	1359	}
	1360
	1361	#endif
	1362
	1363	/*
	1364	* To Unicode Callback helper function
	1365	*/
	1366	static void
	1367	toUnicodeCallback(UConverter *cnv,
	1368	const uint32_t sourceChar, const uint32_t targetUniChar,
	1369	UErrorCode* err){
	1370	if(sourceChar>0xff){
	1371	cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
	1372	cnv->toUBytes[1] = (uint8_t)sourceChar;
	1373	cnv->toULength = 2;
	1374	}
	1375	else{
	1376	cnv->toUBytes[0] =(char) sourceChar;
	1377	cnv->toULength = 1;
	1378	}
	1379
	1380	if(targetUniChar == (missingCharMarker-1/0xfffe/)){
	1381	*err = U_INVALID_CHAR_FOUND;
	1382	}
	1383	else{
	1384	*err = U_ILLEGAL_CHAR_FOUND;
	1385	}
	1386	}
	1387
	1388	/************************************ISO-2022-JP***********************************************/
	1389
	1390	/************************************ IMPORTANT ************************************************
	1391	* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
	1392	* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
	1393	* The converter iterates over each Unicode codepoint
	1394	* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
	1395	* processed one char at a time it would make sense to reduce the extra processing a canned converter
	1396	* would do as far as possible.
	1397	*
	1398	* If the implementation of these macros or structure of sharedData struct change in the future, make
	1399	* sure that ISO-2022 is also changed.
	1400	***************************************************************************************************
	1401	*/
	1402
	1403	/***************************************************************************************************
	1404	* Rules for ISO-2022-jp encoding
	1405	* (i) Escape sequences must be fully contained within a line they should not
	1406	* span new lines or CRs
	1407	* (ii) If the last character on a line is represented by two bytes then an ASCII or
	1408	* JIS-Roman character escape sequence should follow before the line terminates
	1409	* (iii) If the first character on the line is represented by two bytes then a two
	1410	* byte character escape sequence should precede it
	1411	* (iv) If no escape sequence is encountered then the characters are ASCII
	1412	* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
	1413	* and invoked with SS2 (ESC N).
	1414	* (vi) If there is any G0 designation in text, there must be a switch to
	1415	* ASCII or to JIS X 0201-Roman before a space character (but not
	1416	* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
	1417	* characters such as tab or CRLF.
	1418	* (vi) Supported encodings:
	1419	* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
	1420	*
	1421	* source : RFC-1554
	1422	*
	1423	* JISX201, JISX208,JISX212 : new .cnv data files created
	1424	* KSC5601 : alias to ibm-949 mapping table
	1425	* GB2312 : alias to ibm-1386 mapping table
	1426	* ISO-8859-1 : Algorithmic implemented as LATIN1 case
	1427	* ISO-8859-7 : alisas to ibm-9409 mapping table
	1428	*/
	1429
	1430	/* preference order of JP charsets */
	1431	static const StateEnum jpCharsetPref[]={
	1432	ASCII,
	1433	JISX201,
	1434	ISO8859_1,
	1435	JISX208,
	1436	ISO8859_7,
	1437	JISX212,
	1438	GB2312,
	1439	KSC5601,
	1440	HWKANA_7BIT
	1441	};
	1442
	1443	/*
	1444	* The escape sequences must be in order of the enum constants like JISX201 = 3,
	1445	* not in order of jpCharsetPref[]!
	1446	*/
	1447	static const char escSeqChars[][6] ={
	1448	"\x1B\x28\x42", /* <ESC>(B ASCII */
	1449	"\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
	1450	"\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */
	1451	"\x1B\x28\x4A", /* <ESC>(J JISX-201 */
	1452	"\x1B\x24\x42", /* <ESC>$B JISX-208 */
	1453	"\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */
	1454	"\x1B\x24\x41", /* <ESC>$A GB2312 */
	1455	"\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */
	1456	"\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */
	1457
	1458	};
	1459	static const int8_t escSeqCharsLen[] ={
	1460	3, /* length of <ESC>(B ASCII */
	1461	3, /* length of <ESC>.A ISO-8859-1 */
	1462	3, /* length of <ESC>.F ISO-8859-7 */
	1463	3, /* length of <ESC>(J JISX-201 */
	1464	3, /* length of <ESC>$B JISX-208 */
	1465	4, /* length of <ESC>$(D JISX-212 */
	1466	3, /* length of <ESC>$A GB2312 */
	1467	4, /* length of <ESC>$(C KSC5601 */
	1468	3 /* length of <ESC>(I HWKANA_7BIT */
	1469	};
	1470
	1471	/*
	1472	* The iteration over various code pages works this way:
	1473	* i) Get the currentState from myConverterData->currentState
	1474	* ii) Check if the character is mapped to a valid character in the currentState
	1475	* Yes -> a) set the initIterState to currentState
	1476	* b) remain in this state until an invalid character is found
	1477	* No -> a) go to the next code page and find the character
	1478	* iii) Before changing the state increment the current state check if the current state
	1479	* is equal to the intitIteration state
	1480	* Yes -> A character that cannot be represented in any of the supported encodings
	1481	* break and return a U_INVALID_CHARACTER error
	1482	* No -> Continue and find the character in next code page
	1483	*
	1484	*
	1485	* TODO: Implement a priority technique where the users are allowed to set the priority of code pages
	1486	*/
	1487
	1488	/* Map 00..7F to Unicode according to JIS X 0201. */
	1489	static inline uint32_t
	1490	jisx201ToU(uint32_t value) {
	1491	if(value < 0x5c) {
	1492	return value;
	1493	} else if(value == 0x5c) {
	1494	return 0xa5;
	1495	} else if(value == 0x7e) {
	1496	return 0x203e;
	1497	} else /* value <= 0x7f */ {
	1498	return value;
	1499	}
	1500	}
	1501
	1502	/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
	1503	static inline uint32_t
	1504	jisx201FromU(uint32_t value) {
	1505	if(value<=0x7f) {
	1506	if(value!=0x5c && value!=0x7e) {
	1507	return value;
	1508	}
	1509	} else if(value==0xa5) {
	1510	return 0x5c;
	1511	} else if(value==0x203e) {
	1512	return 0x7e;
	1513	}
	1514	return 0xfffe;
	1515	}
	1516
	1517	/*
	1518	* Take a valid Shift-JIS byte pair, check that it is in the range corresponding
	1519	* to JIS X 0208, and convert it to a pair of 21..7E bytes.
	1520	* Return 0 if the byte pair is out of range.
	1521	*/
	1522	static inline uint32_t
	1523	_2022FromSJIS(uint32_t value) {
	1524	uint8_t trail;
	1525
	1526	if(value > 0xEFFC) {
	1527	return 0; /* beyond JIS X 0208 */
	1528	}
	1529
	1530	trail = (uint8_t)value;
	1531
	1532	value &= 0xff00; /* lead byte */
	1533	if(value <= 0x9f00) {
	1534	value -= 0x7000;
	1535	} else /* 0xe000 <= value <= 0xef00 */ {
	1536	value -= 0xb000;
	1537	}
	1538	value <<= 1;
	1539
	1540	if(trail <= 0x9e) {
	1541	value -= 0x100;
	1542	if(trail <= 0x7e) {
	1543	value \|= trail - 0x1f;
	1544	} else {
	1545	value \|= trail - 0x20;
	1546	}
	1547	} else /* trail <= 0xfc */ {
	1548	value \|= trail - 0x7e;
	1549	}
	1550	return value;
	1551	}
	1552
	1553	/*
	1554	* Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
	1555	* If either byte is outside 21..7E make sure that the result is not valid
	1556	* for Shift-JIS so that the converter catches it.
	1557	* Some invalid byte values already turn into equally invalid Shift-JIS
	1558	* byte values and need not be tested explicitly.
	1559	*/
	1560	static inline void
	1561	_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
	1562	if(c1&1) {
	1563	++c1;
	1564	if(c2 <= 0x5f) {
	1565	c2 += 0x1f;
	1566	} else if(c2 <= 0x7e) {
	1567	c2 += 0x20;
	1568	} else {
	1569	c2 = 0; /* invalid */
	1570	}
	1571	} else {
	1572	if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
	1573	c2 += 0x7e;
	1574	} else {
	1575	c2 = 0; /* invalid */
	1576	}
	1577	}
	1578	c1 >>= 1;
	1579	if(c1 <= 0x2f) {
	1580	c1 += 0x70;
	1581	} else if(c1 <= 0x3f) {
	1582	c1 += 0xb0;
	1583	} else {
	1584	c1 = 0; /* invalid */
	1585	}
	1586	bytes[0] = (char)c1;
	1587	bytes[1] = (char)c2;
	1588	}
	1589
	1590	/*
	1591	* JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
	1592	* Katakana.
	1593	* Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
	1594	* because Shift-JIS roundtrips half-width Katakana to single bytes.
	1595	* These were the only fallbacks in ICU's jisx-208.ucm file.
	1596	*/
	1597	static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
	1598	0x2123, /* U+FF61 */
	1599	0x2156,
	1600	0x2157,
	1601	0x2122,
	1602	0x2126,
	1603	0x2572,
	1604	0x2521,
	1605	0x2523,
	1606	0x2525,
	1607	0x2527,
	1608	0x2529,
	1609	0x2563,
	1610	0x2565,
	1611	0x2567,
	1612	0x2543,
	1613	0x213C, /* U+FF70 */
	1614	0x2522,
	1615	0x2524,
	1616	0x2526,
	1617	0x2528,
	1618	0x252A,
	1619	0x252B,
	1620	0x252D,
	1621	0x252F,
	1622	0x2531,
	1623	0x2533,
	1624	0x2535,
	1625	0x2537,
	1626	0x2539,
	1627	0x253B,
	1628	0x253D,
	1629	0x253F, /* U+FF80 */
	1630	0x2541,
	1631	0x2544,
	1632	0x2546,
	1633	0x2548,
	1634	0x254A,
	1635	0x254B,
	1636	0x254C,
	1637	0x254D,
	1638	0x254E,
	1639	0x254F,
	1640	0x2552,
	1641	0x2555,
	1642	0x2558,
	1643	0x255B,
	1644	0x255E,
	1645	0x255F, /* U+FF90 */
	1646	0x2560,
	1647	0x2561,
	1648	0x2562,
	1649	0x2564,
	1650	0x2566,
	1651	0x2568,
	1652	0x2569,
	1653	0x256A,
	1654	0x256B,
	1655	0x256C,
	1656	0x256D,
	1657	0x256F,
	1658	0x2573,
	1659	0x212B,
	1660	0x212C /* U+FF9F */
	1661	};
	1662
	1663	static void U_CALLCONV
	1664	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
	1665	UConverter *cnv = args->converter;
	1666	UConverterDataISO2022 *converterData;
	1667	ISO2022State *pFromU2022State;
	1668	uint8_t target = (uint8_t ) args->target;
	1669	const uint8_t targetLimit = (const uint8_t ) args->targetLimit;
	1670	const UChar* source = args->source;
	1671	const UChar* sourceLimit = args->sourceLimit;
	1672	int32_t* offsets = args->offsets;
	1673	UChar32 sourceChar;
	1674	char buffer[8];
	1675	int32_t len, outLen;
	1676	int8_t choices[10];
	1677	int32_t choiceCount;
	1678	uint32_t targetValue = 0;
	1679	UBool useFallback;
	1680
	1681	int32_t i;
	1682	int8_t cs, g;
	1683
	1684	/* set up the state */
	1685	converterData = (UConverterDataISO2022*)cnv->extraInfo;
	1686	pFromU2022State = &converterData->fromU2022State;
	1687
	1688	choiceCount = 0;
	1689
	1690	/* check if the last codepoint of previous buffer was a lead surrogate*/
	1691	if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
	1692	goto getTrail;
	1693	}
	1694
	1695	while(source < sourceLimit) {
	1696	if(target < targetLimit) {
	1697
	1698	sourceChar = *(source++);
	1699	/check if the char is a First surrogate/
	1700	if(U16_IS_SURROGATE(sourceChar)) {
	1701	if(U16_IS_SURROGATE_LEAD(sourceChar)) {
	1702	getTrail:
	1703	/look ahead to find the trail surrogate/
	1704	if(source < sourceLimit) {
	1705	/* test the following code unit */
	1706	UChar trail=(UChar) *source;
	1707	if(U16_IS_TRAIL(trail)) {
	1708	source++;
	1709	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
	1710	cnv->fromUChar32=0x00;
	1711	/* convert this supplementary code point */
	1712	/* exit this condition tree */
	1713	} else {
	1714	/* this is an unmatched lead code unit (1st surrogate) */
	1715	/* callback(illegal) */
	1716	*err=U_ILLEGAL_CHAR_FOUND;
	1717	cnv->fromUChar32=sourceChar;
	1718	break;
	1719	}
	1720	} else {
	1721	/* no more input */
	1722	cnv->fromUChar32=sourceChar;
	1723	break;
	1724	}
	1725	} else {
	1726	/* this is an unmatched trail code unit (2nd surrogate) */
	1727	/* callback(illegal) */
	1728	*err=U_ILLEGAL_CHAR_FOUND;
	1729	cnv->fromUChar32=sourceChar;
	1730	break;
	1731	}
	1732	}
	1733
	1734	/* do not convert SO/SI/ESC */
	1735	if(IS_2022_CONTROL(sourceChar)) {
	1736	/* callback(illegal) */
	1737	*err=U_ILLEGAL_CHAR_FOUND;
	1738	cnv->fromUChar32=sourceChar;
	1739	break;
	1740	}
	1741
	1742	/* do the conversion */
	1743
	1744	if(choiceCount == 0) {
	1745	uint16_t csm;
	1746
	1747	/*
	1748	* The csm variable keeps track of which charsets are allowed
	1749	* and not used yet while building the choices[].
	1750	*/
	1751	csm = jpCharsetMasks[converterData->version];
	1752	choiceCount = 0;
	1753
	1754	/* JIS7/8: try single-byte half-width Katakana before JISX208 */
	1755	if(converterData->version == 3 \|\| converterData->version == 4) {
	1756	choices[choiceCount++] = (int8_t)HWKANA_7BIT;
	1757	}
	1758	/* Do not try single-byte half-width Katakana for other versions. */
	1759	csm &= ~CSM(HWKANA_7BIT);
	1760
	1761	/* try the current G0 charset */
	1762	choices[choiceCount++] = cs = pFromU2022State->cs[0];
	1763	csm &= ~CSM(cs);
	1764
	1765	/* try the current G2 charset */
	1766	if((cs = pFromU2022State->cs[2]) != 0) {
	1767	choices[choiceCount++] = cs;
	1768	csm &= ~CSM(cs);
	1769	}
	1770
	1771	/* try all the other possible charsets */
	1772	for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) {
	1773	cs = (int8_t)jpCharsetPref[i];
	1774	if(CSM(cs) & csm) {
	1775	choices[choiceCount++] = cs;
	1776	csm &= ~CSM(cs);
	1777	}
	1778	}
	1779	}
	1780
	1781	cs = g = 0;
	1782	/*
	1783	* len==0: no mapping found yet
	1784	* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
	1785	* len>0: found a roundtrip result, done
	1786	*/
	1787	len = 0;
	1788	/*
	1789	* We will turn off useFallback after finding a fallback,
	1790	* but we still get fallbacks from PUA code points as usual.
	1791	* Therefore, we will also need to check that we don't overwrite
	1792	* an early fallback with a later one.
	1793	*/
	1794	useFallback = cnv->useFallback;
	1795
	1796	for(i = 0; i < choiceCount && len <= 0; ++i) {
	1797	uint32_t value;
	1798	int32_t len2;
	1799	int8_t cs0 = choices[i];
	1800	switch(cs0) {
	1801	case ASCII:
	1802	if(sourceChar <= 0x7f) {
	1803	targetValue = (uint32_t)sourceChar;
	1804	len = 1;
	1805	cs = cs0;
	1806	g = 0;
	1807	}
	1808	break;
	1809	case ISO8859_1:
	1810	if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
	1811	targetValue = (uint32_t)sourceChar - 0x80;
	1812	len = 1;
	1813	cs = cs0;
	1814	g = 2;
	1815	}
	1816	break;
	1817	case HWKANA_7BIT:
	1818	if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
	1819	if(converterData->version==3) {
	1820	/* JIS7: use G1 (SO) */
	1821	/* Shift U+FF61..U+FF9F to bytes 21..5F. */
	1822	targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
	1823	len = 1;
	1824	pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
	1825	g = 1;
	1826	} else if(converterData->version==4) {
	1827	/* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
	1828	/* Shift U+FF61..U+FF9F to bytes A1..DF. */
	1829	targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
	1830	len = 1;
	1831
	1832	cs = pFromU2022State->cs[0];
	1833	if(IS_JP_DBCS(cs)) {
	1834	/* switch from a DBCS charset to JISX201 */
	1835	cs = (int8_t)JISX201;
	1836	}
	1837	/* else stay in the current G0 charset */
	1838	g = 0;
	1839	}
	1840	/* else do not use HWKANA_7BIT with other versions */
	1841	}
	1842	break;
	1843	case JISX201:
	1844	/* G0 SBCS */
	1845	value = jisx201FromU(sourceChar);
	1846	if(value <= 0x7f) {
	1847	targetValue = value;
	1848	len = 1;
	1849	cs = cs0;
	1850	g = 0;
	1851	useFallback = FALSE;
	1852	}
	1853	break;
	1854	case JISX208:
	1855	/* G0 DBCS from Shift-JIS table */
	1856	len2 = MBCS_FROM_UCHAR32_ISO2022(
	1857	converterData->myConverterArray[cs0],
	1858	sourceChar, &value,
	1859	useFallback, MBCS_OUTPUT_2);
	1860	if(len2 == 2 \|\| (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
	1861	value = _2022FromSJIS(value);
	1862	if(value != 0) {
	1863	targetValue = value;
	1864	len = len2;
	1865	cs = cs0;
	1866	g = 0;
	1867	useFallback = FALSE;
	1868	}
	1869	} else if(len == 0 && useFallback &&
	1870	(uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
	1871	targetValue = hwkana_fb[sourceChar - HWKANA_START];
	1872	len = -2;
	1873	cs = cs0;
	1874	g = 0;
	1875	useFallback = FALSE;
	1876	}
	1877	break;
	1878	case ISO8859_7:
	1879	/* G0 SBCS forced to 7-bit output */
	1880	len2 = MBCS_SINGLE_FROM_UCHAR32(
	1881	converterData->myConverterArray[cs0],
	1882	sourceChar, &value,
	1883	useFallback);
	1884	if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
	1885	targetValue = value - 0x80;
	1886	len = len2;
	1887	cs = cs0;
	1888	g = 2;
	1889	useFallback = FALSE;
	1890	}
	1891	break;
	1892	default:
	1893	/* G0 DBCS */
	1894	len2 = MBCS_FROM_UCHAR32_ISO2022(
	1895	converterData->myConverterArray[cs0],
	1896	sourceChar, &value,
	1897	useFallback, MBCS_OUTPUT_2);
	1898	if(len2 == 2 \|\| (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
	1899	if(cs0 == KSC5601) {
	1900	/*
	1901	* Check for valid bytes for the encoding scheme.
	1902	* This is necessary because the sub-converter (windows-949)
	1903	* has a broader encoding scheme than is valid for 2022.
	1904	*/
	1905	value = _2022FromGR94DBCS(value);
	1906	if(value == 0) {
	1907	break;
	1908	}
	1909	}
	1910	targetValue = value;
	1911	len = len2;
	1912	cs = cs0;
	1913	g = 0;
	1914	useFallback = FALSE;
	1915	}
	1916	break;
	1917	}
	1918	}
	1919
	1920	if(len != 0) {
	1921	if(len < 0) {
	1922	len = -len; /* fallback */
	1923	}
	1924	outLen = 0; /* count output bytes */
	1925
	1926	/* write SI if necessary (only for JIS7) */
	1927	if(pFromU2022State->g == 1 && g == 0) {
	1928	buffer[outLen++] = UCNV_SI;
	1929	pFromU2022State->g = 0;
	1930	}
	1931
	1932	/* write the designation sequence if necessary */
	1933	if(cs != pFromU2022State->cs[g]) {
	1934	int32_t escLen = escSeqCharsLen[cs];
	1935	uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
	1936	outLen += escLen;
	1937	pFromU2022State->cs[g] = cs;
	1938
	1939	/* invalidate the choices[] */
	1940	choiceCount = 0;
	1941	}
	1942
	1943	/* write the shift sequence if necessary */
	1944	if(g != pFromU2022State->g) {
	1945	switch(g) {
	1946	/* case 0 handled before writing escapes */
	1947	case 1:
	1948	buffer[outLen++] = UCNV_SO;
	1949	pFromU2022State->g = 1;
	1950	break;
	1951	default: /* case 2 */
	1952	buffer[outLen++] = 0x1b;
	1953	buffer[outLen++] = 0x4e;
	1954	break;
	1955	/* no case 3: no SS3 in ISO-2022-JP-x */
	1956	}
	1957	}
	1958
	1959	/* write the output bytes */
	1960	if(len == 1) {
	1961	buffer[outLen++] = (char)targetValue;
	1962	} else /* len == 2 */ {
	1963	buffer[outLen++] = (char)(targetValue >> 8);
	1964	buffer[outLen++] = (char)targetValue;
	1965	}
	1966	} else {
	1967	/*
	1968	* if we cannot find the character after checking all codepages
	1969	* then this is an error
	1970	*/
	1971	*err = U_INVALID_CHAR_FOUND;
	1972	cnv->fromUChar32=sourceChar;
	1973	break;
	1974	}
	1975
	1976	if(sourceChar == CR \|\| sourceChar == LF) {
	1977	/* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
	1978	pFromU2022State->cs[2] = 0;
	1979	choiceCount = 0;
	1980	}
	1981
	1982	/* output outLen>0 bytes in buffer[] */
	1983	if(outLen == 1) {
	1984	*target++ = buffer[0];
	1985	if(offsets) {
	1986	offsets++ = (int32_t)(source - args->source - 1); / -1: known to be ASCII */
	1987	}
	1988	} else if(outLen == 2 && (target + 2) <= targetLimit) {
	1989	*target++ = buffer[0];
	1990	*target++ = buffer[1];
	1991	if(offsets) {
	1992	int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
	1993	*offsets++ = sourceIndex;
	1994	*offsets++ = sourceIndex;
	1995	}
	1996	} else {
	1997	fromUWriteUInt8(
	1998	cnv,
	1999	buffer, outLen,
	2000	&target, (const char *)targetLimit,
	2001	&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
	2002	err);
	2003	if(U_FAILURE(*err)) {
	2004	break;
	2005	}
	2006	}
	2007	} /* end if(myTargetIndex<myTargetLength) */
	2008	else{
	2009	*err =U_BUFFER_OVERFLOW_ERROR;
	2010	break;
	2011	}
	2012
	2013	}/* end while(mySourceIndex<mySourceLength) */
	2014
	2015	/*
	2016	* the end of the input stream and detection of truncated input
	2017	* are handled by the framework, but for ISO-2022-JP conversion
	2018	* we need to be in ASCII mode at the very end
	2019	*
	2020	* conditions:
	2021	* successful
	2022	* in SO mode or not in ASCII mode
	2023	* end of input and no truncated input
	2024	*/
	2025	if( U_SUCCESS(*err) &&
	2026	(pFromU2022State->g!=0 \|\| pFromU2022State->cs[0]!=ASCII) &&
	2027	args->flush && source>=sourceLimit && cnv->fromUChar32==0
	2028	) {
	2029	int32_t sourceIndex;
	2030
	2031	outLen = 0;
	2032
	2033	if(pFromU2022State->g != 0) {
	2034	buffer[outLen++] = UCNV_SI;
	2035	pFromU2022State->g = 0;
	2036	}
	2037
	2038	if(pFromU2022State->cs[0] != ASCII) {
	2039	int32_t escLen = escSeqCharsLen[ASCII];
	2040	uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
	2041	outLen += escLen;
	2042	pFromU2022State->cs[0] = (int8_t)ASCII;
	2043	}
	2044
	2045	/* get the source index of the last input character */
	2046	/*
	2047	* TODO this would be simpler and more reliable if we used a pair
	2048	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
	2049	* so that we could simply use the prevSourceIndex here;
	2050	* this code gives an incorrect result for the rare case of an unmatched
	2051	* trail surrogate that is alone in the last buffer of the text stream
	2052	*/
	2053	sourceIndex=(int32_t)(source-args->source);
	2054	if(sourceIndex>0) {
	2055	--sourceIndex;
	2056	if( U16_IS_TRAIL(args->source[sourceIndex]) &&
	2057	(sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1]))
	2058	) {
	2059	--sourceIndex;
	2060	}
	2061	} else {
	2062	sourceIndex=-1;
	2063	}
	2064
	2065	fromUWriteUInt8(
	2066	cnv,
	2067	buffer, outLen,
	2068	&target, (const char *)targetLimit,
	2069	&offsets, sourceIndex,
	2070	err);
	2071	}
	2072
	2073	/save the state and return /
	2074	args->source = source;
	2075	args->target = (char*)target;
	2076	}
	2077
	2078	/************* to unicode *****************/
	2079
	2080	static void U_CALLCONV
	2081	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
	2082	UErrorCode* err){
	2083	char tempBuf[2];
	2084	const char mySource = (char ) args->source;
	2085	UChar *myTarget = args->target;
	2086	const char *mySourceLimit = args->sourceLimit;
	2087	uint32_t targetUniChar = 0x0000;
	2088	uint32_t mySourceChar = 0x0000;
	2089	uint32_t tmpSourceChar = 0x0000;
	2090	UConverterDataISO2022* myData;
	2091	ISO2022State *pToU2022State;
	2092	StateEnum cs;
	2093
	2094	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
	2095	pToU2022State = &myData->toU2022State;
	2096
	2097	if(myData->key != 0) {
	2098	/* continue with a partial escape sequence */
	2099	goto escape;
	2100	} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
	2101	/* continue with a partial double-byte character */
	2102	mySourceChar = args->converter->toUBytes[0];
	2103	args->converter->toULength = 0;
	2104	cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
	2105	targetUniChar = missingCharMarker;
	2106	goto getTrailByte;
	2107	}
	2108
	2109	while(mySource < mySourceLimit){
	2110
	2111	targetUniChar =missingCharMarker;
	2112
	2113	if(myTarget < args->targetLimit){
	2114
	2115	mySourceChar= (unsigned char) *mySource++;
	2116
	2117	switch(mySourceChar) {
	2118	case UCNV_SI:
	2119	if(myData->version==3) {
	2120	pToU2022State->g=0;
	2121	continue;
	2122	} else {
	2123	/* only JIS7 uses SI/SO, not ISO-2022-JP-x */
	2124	myData->isEmptySegment = FALSE; /* reset this, we have a different error */
	2125	break;
	2126	}
	2127
	2128	case UCNV_SO:
	2129	if(myData->version==3) {
	2130	/* JIS7: switch to G1 half-width Katakana */
	2131	pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
	2132	pToU2022State->g=1;
	2133	continue;
	2134	} else {
	2135	/* only JIS7 uses SI/SO, not ISO-2022-JP-x */
	2136	myData->isEmptySegment = FALSE; /* reset this, we have a different error */
	2137	break;
	2138	}
	2139
	2140	case ESC_2022:
	2141	mySource--;
	2142	escape:
	2143	{
	2144	const char * mySourceBefore = mySource;
	2145	int8_t toULengthBefore = args->converter->toULength;
	2146
	2147	changeState_2022(args->converter,&(mySource),
	2148	mySourceLimit, ISO_2022_JP,err);
	2149
	2150	/* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
	2151	if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
	2152	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	2153	args->converter->toUCallbackReason = UCNV_IRREGULAR;
	2154	args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
	2155	}
	2156	}
	2157
	2158	/* invalid or illegal escape sequence */
	2159	if(U_FAILURE(*err)){
	2160	args->target = myTarget;
	2161	args->source = mySource;
	2162	myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
	2163	return;
	2164	}
	2165	/* If we successfully completed an escape sequence, we begin a new segment, empty so far */
	2166	if(myData->key==0) {
	2167	myData->isEmptySegment = TRUE;
	2168	}
	2169	continue;
	2170
	2171	/* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
	2172
	2173	case CR:
	2174	case LF:
	2175	/* automatically reset to single-byte mode */
	2176	if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
	2177	pToU2022State->cs[0] = (int8_t)ASCII;
	2178	}
	2179	pToU2022State->cs[2] = 0;
	2180	pToU2022State->g = 0;
	2181	U_FALLTHROUGH;
	2182	default:
	2183	/* convert one or two bytes */
	2184	myData->isEmptySegment = FALSE;
	2185	cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
	2186	if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
	2187	!IS_JP_DBCS(cs)
	2188	) {
	2189	/* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
	2190	targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
	2191
	2192	/* return from a single-shift state to the previous one */
	2193	if(pToU2022State->g >= 2) {
	2194	pToU2022State->g=pToU2022State->prevG;
	2195	}
	2196	} else switch(cs) {
	2197	case ASCII:
	2198	if(mySourceChar <= 0x7f) {
	2199	targetUniChar = mySourceChar;
	2200	}
	2201	break;
	2202	case ISO8859_1:
	2203	if(mySourceChar <= 0x7f) {
	2204	targetUniChar = mySourceChar + 0x80;
	2205	}
	2206	/* return from a single-shift state to the previous one */
	2207	pToU2022State->g=pToU2022State->prevG;
	2208	break;
	2209	case ISO8859_7:
	2210	if(mySourceChar <= 0x7f) {
	2211	/* convert mySourceChar+0x80 to use a normal 8-bit table */
	2212	targetUniChar =
	2213	_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
	2214	myData->myConverterArray[cs],
	2215	mySourceChar + 0x80);
	2216	}
	2217	/* return from a single-shift state to the previous one */
	2218	pToU2022State->g=pToU2022State->prevG;
	2219	break;
	2220	case JISX201:
	2221	if(mySourceChar <= 0x7f) {
	2222	targetUniChar = jisx201ToU(mySourceChar);
	2223	}
	2224	break;
	2225	case HWKANA_7BIT:
	2226	if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
	2227	/* 7-bit halfwidth Katakana */
	2228	targetUniChar = mySourceChar + (HWKANA_START - 0x21);
	2229	}
	2230	break;
	2231	default:
	2232	/* G0 DBCS */
	2233	if(mySource < mySourceLimit) {
	2234	int leadIsOk, trailIsOk;
	2235	uint8_t trailByte;
	2236	getTrailByte:
	2237	trailByte = (uint8_t)*mySource;
	2238	/*
	2239	* Ticket 5691: consistent illegal sequences:
	2240	* - We include at least the first byte in the illegal sequence.
	2241	* - If any of the non-initial bytes could be the start of a character,
	2242	* we stop the illegal sequence before the first one of those.
	2243	*
	2244	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
	2245	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
	2246	* Otherwise we convert or report the pair of bytes.
	2247	*/
	2248	leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
	2249	trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
	2250	if (leadIsOk && trailIsOk) {
	2251	++mySource;
	2252	tmpSourceChar = (mySourceChar << 8) \| trailByte;
	2253	if(cs == JISX208) {
	2254	_2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
	2255	mySourceChar = tmpSourceChar;
	2256	} else {
	2257	/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
	2258	mySourceChar = tmpSourceChar;
	2259	if (cs == KSC5601) {
	2260	tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
	2261	}
	2262	tempBuf[0] = (char)(tmpSourceChar >> 8);
	2263	tempBuf[1] = (char)(tmpSourceChar);
	2264	}
	2265	targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
	2266	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {
	2267	/* report a pair of illegal bytes if the second byte is not a DBCS starter */
	2268	++mySource;
	2269	/* add another bit so that the code below writes 2 bytes in case of error */
	2270	mySourceChar = 0x10000 \| (mySourceChar << 8) \| trailByte;
	2271	}
	2272	} else {
	2273	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
	2274	args->converter->toULength = 1;
	2275	goto endloop;
	2276	}
	2277	} /* End of inner switch */
	2278	break;
	2279	} /* End of outer switch */
	2280	if(targetUniChar < (missingCharMarker-1/0xfffe/)){
	2281	if(args->offsets){
	2282	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
	2283	}
	2284	*(myTarget++)=(UChar)targetUniChar;
	2285	}
	2286	else if(targetUniChar > missingCharMarker){
	2287	/* disassemble the surrogate pair and write to output*/
	2288	targetUniChar-=0x0010000;
	2289	*myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
	2290	if(args->offsets){
	2291	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
	2292	}
	2293	++myTarget;
	2294	if(myTarget< args->targetLimit){
	2295	*myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
	2296	if(args->offsets){
	2297	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
	2298	}
	2299	++myTarget;
	2300	}else{
	2301	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
	2302	(UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
	2303	}
	2304
	2305	}
	2306	else{
	2307	/* Call the callback function*/
	2308	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
	2309	break;
	2310	}
	2311	}
	2312	else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */
	2313	*err =U_BUFFER_OVERFLOW_ERROR;
	2314	break;
	2315	}
	2316	}
	2317	endloop:
	2318	args->target = myTarget;
	2319	args->source = mySource;
	2320	}
	2321
	2322
	2323	#if !UCONFIG_ONLY_HTML_CONVERSION
	2324	/***************************************************************
	2325	* Rules for ISO-2022-KR encoding
	2326	* i) The KSC5601 designator sequence should appear only once in a file,
	2327	* at the begining of a line before any KSC5601 characters. This usually
	2328	* means that it appears by itself on the first line of the file
	2329	* ii) There are only 2 shifting sequences SO to shift into double byte mode
	2330	* and SI to shift into single byte mode
	2331	*/
	2332	static void U_CALLCONV
	2333	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
	2334
	2335	UConverter* saveConv = args->converter;
	2336	UConverterDataISO2022 myConverterData=(UConverterDataISO2022)saveConv->extraInfo;
	2337	args->converter=myConverterData->currentConverter;
	2338
	2339	myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
	2340	ucnv_MBCSFromUnicodeWithOffsets(args,err);
	2341	saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
	2342
	2343	if(*err == U_BUFFER_OVERFLOW_ERROR) {
	2344	if(myConverterData->currentConverter->charErrorBufferLength > 0) {
	2345	uprv_memcpy(
	2346	saveConv->charErrorBuffer,
	2347	myConverterData->currentConverter->charErrorBuffer,
	2348	myConverterData->currentConverter->charErrorBufferLength);
	2349	}
	2350	saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
	2351	myConverterData->currentConverter->charErrorBufferLength = 0;
	2352	}
	2353	args->converter=saveConv;
	2354	}
	2355
	2356	static void U_CALLCONV
	2357	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
	2358
	2359	const UChar *source = args->source;
	2360	const UChar *sourceLimit = args->sourceLimit;
	2361	unsigned char target = (unsigned char ) args->target;
	2362	unsigned char targetLimit = (unsigned char ) args->targetLimit;
	2363	int32_t* offsets = args->offsets;
	2364	uint32_t targetByteUnit = 0x0000;
	2365	UChar32 sourceChar = 0x0000;
	2366	UBool isTargetByteDBCS;
	2367	UBool oldIsTargetByteDBCS;
	2368	UConverterDataISO2022 *converterData;
	2369	UConverterSharedData* sharedData;
	2370	UBool useFallback;
	2371	int32_t length =0;
	2372
	2373	converterData=(UConverterDataISO2022*)args->converter->extraInfo;
	2374	/* if the version is 1 then the user is requesting
	2375	* conversion with ibm-25546 pass the arguments to
	2376	* MBCS converter and return
	2377	*/
	2378	if(converterData->version==1){
	2379	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
	2380	return;
	2381	}
	2382
	2383	/* initialize data */
	2384	sharedData = converterData->currentConverter->sharedData;
	2385	useFallback = args->converter->useFallback;
	2386	isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
	2387	oldIsTargetByteDBCS = isTargetByteDBCS;
	2388
	2389	isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;
	2390	if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
	2391	goto getTrail;
	2392	}
	2393	while(source < sourceLimit){
	2394
	2395	targetByteUnit = missingCharMarker;
	2396
	2397	if(target < (unsigned char*) args->targetLimit){
	2398	sourceChar = *source++;
	2399
	2400	/* do not convert SO/SI/ESC */
	2401	if(IS_2022_CONTROL(sourceChar)) {
	2402	/* callback(illegal) */
	2403	*err=U_ILLEGAL_CHAR_FOUND;
	2404	args->converter->fromUChar32=sourceChar;
	2405	break;
	2406	}
	2407
	2408	length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
	2409	if(length < 0) {
	2410	length = -length; /* fallback */
	2411	}
	2412	/* only DBCS or SBCS characters are expected*/
	2413	/* DB characters with high bit set to 1 are expected */
	2414	if( length > 2 \|\| length==0 \|\|
	2415	(length == 1 && targetByteUnit > 0x7f) \|\|
	2416	(length == 2 &&
	2417	((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) \|\|
	2418	(uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
	2419	) {
	2420	targetByteUnit=missingCharMarker;
	2421	}
	2422	if (targetByteUnit != missingCharMarker){
	2423
	2424	oldIsTargetByteDBCS = isTargetByteDBCS;
	2425	isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
	2426	/* append the shift sequence */
	2427	if (oldIsTargetByteDBCS != isTargetByteDBCS ){
	2428
	2429	if (isTargetByteDBCS)
	2430	*target++ = UCNV_SO;
	2431	else
	2432	*target++ = UCNV_SI;
	2433	if(offsets)
	2434	*(offsets++) = (int32_t)(source - args->source-1);
	2435	}
	2436	/* write the targetUniChar to target */
	2437	if(targetByteUnit <= 0x00FF){
	2438	if( target < targetLimit){
	2439	*(target++) = (unsigned char) targetByteUnit;
	2440	if(offsets){
	2441	*(offsets++) = (int32_t)(source - args->source-1);
	2442	}
	2443
	2444	}else{
	2445	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
	2446	*err = U_BUFFER_OVERFLOW_ERROR;
	2447	}
	2448	}else{
	2449	if(target < targetLimit){
	2450	*(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
	2451	if(offsets){
	2452	*(offsets++) = (int32_t)(source - args->source-1);
	2453	}
	2454	if(target < targetLimit){
	2455	*(target++) =(unsigned char) (targetByteUnit -0x80);
	2456	if(offsets){
	2457	*(offsets++) = (int32_t)(source - args->source-1);
	2458	}
	2459	}else{
	2460	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
	2461	*err = U_BUFFER_OVERFLOW_ERROR;
	2462	}
	2463	}else{
	2464	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
	2465	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
	2466	*err = U_BUFFER_OVERFLOW_ERROR;
	2467	}
	2468	}
	2469
	2470	}
	2471	else{
	2472	/* oops.. the code point is unassingned
	2473	* set the error and reason
	2474	*/
	2475
	2476	/check if the char is a First surrogate/
	2477	if(U16_IS_SURROGATE(sourceChar)) {
	2478	if(U16_IS_SURROGATE_LEAD(sourceChar)) {
	2479	getTrail:
	2480	/look ahead to find the trail surrogate/
	2481	if(source < sourceLimit) {
	2482	/* test the following code unit */
	2483	UChar trail=(UChar) *source;
	2484	if(U16_IS_TRAIL(trail)) {
	2485	source++;
	2486	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
	2487	*err = U_INVALID_CHAR_FOUND;
	2488	/* convert this surrogate code point */
	2489	/* exit this condition tree */
	2490	} else {
	2491	/* this is an unmatched lead code unit (1st surrogate) */
	2492	/* callback(illegal) */
	2493	*err=U_ILLEGAL_CHAR_FOUND;
	2494	}
	2495	} else {
	2496	/* no more input */
	2497	*err = U_ZERO_ERROR;
	2498	}
	2499	} else {
	2500	/* this is an unmatched trail code unit (2nd surrogate) */
	2501	/* callback(illegal) */
	2502	*err=U_ILLEGAL_CHAR_FOUND;
	2503	}
	2504	} else {
	2505	/* callback(unassigned) for a BMP code point */
	2506	*err = U_INVALID_CHAR_FOUND;
	2507	}
	2508
	2509	args->converter->fromUChar32=sourceChar;
	2510	break;
	2511	}
	2512	} /* end if(myTargetIndex<myTargetLength) */
	2513	else{
	2514	*err =U_BUFFER_OVERFLOW_ERROR;
	2515	break;
	2516	}
	2517
	2518	}/* end while(mySourceIndex<mySourceLength) */
	2519
	2520	/*
	2521	* the end of the input stream and detection of truncated input
	2522	* are handled by the framework, but for ISO-2022-KR conversion
	2523	* we need to be in ASCII mode at the very end
	2524	*
	2525	* conditions:
	2526	* successful
	2527	* not in ASCII mode
	2528	* end of input and no truncated input
	2529	*/
	2530	if( U_SUCCESS(*err) &&
	2531	isTargetByteDBCS &&
	2532	args->flush && source>=sourceLimit && args->converter->fromUChar32==0
	2533	) {
	2534	int32_t sourceIndex;
	2535
	2536	/* we are switching to ASCII */
	2537	isTargetByteDBCS=FALSE;
	2538
	2539	/* get the source index of the last input character */
	2540	/*
	2541	* TODO this would be simpler and more reliable if we used a pair
	2542	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
	2543	* so that we could simply use the prevSourceIndex here;
	2544	* this code gives an incorrect result for the rare case of an unmatched
	2545	* trail surrogate that is alone in the last buffer of the text stream
	2546	*/
	2547	sourceIndex=(int32_t)(source-args->source);
	2548	if(sourceIndex>0) {
	2549	--sourceIndex;
	2550	if( U16_IS_TRAIL(args->source[sourceIndex]) &&
	2551	(sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1]))
	2552	) {
	2553	--sourceIndex;
	2554	}
	2555	} else {
	2556	sourceIndex=-1;
	2557	}
	2558
	2559	fromUWriteUInt8(
	2560	args->converter,
	2561	SHIFT_IN_STR, 1,
	2562	&target, (const char *)targetLimit,
	2563	&offsets, sourceIndex,
	2564	err);
	2565	}
	2566
	2567	/save the state and return /
	2568	args->source = source;
	2569	args->target = (char*)target;
	2570	args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
	2571	}
	2572
	2573	/********************** To Unicode *************************************/
	2574
	2575	static void U_CALLCONV
	2576	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
	2577	UErrorCode* err){
	2578	char const* sourceStart;
	2579	UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
	2580
	2581	UConverterToUnicodeArgs subArgs;
	2582	int32_t minArgsSize;
	2583
	2584	/* set up the subconverter arguments */
	2585	if(args->size<sizeof(UConverterToUnicodeArgs)) {
	2586	minArgsSize = args->size;
	2587	} else {
	2588	minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
	2589	}
	2590
	2591	uprv_memcpy(&subArgs, args, minArgsSize);
	2592	subArgs.size = (uint16_t)minArgsSize;
	2593	subArgs.converter = myData->currentConverter;
	2594
	2595	/* remember the original start of the input for offsets */
	2596	sourceStart = args->source;
	2597
	2598	if(myData->key != 0) {
	2599	/* continue with a partial escape sequence */
	2600	goto escape;
	2601	}
	2602
	2603	while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
	2604	/Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/
	2605	subArgs.source = args->source;
	2606	subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
	2607	if(subArgs.source != subArgs.sourceLimit) {
	2608	/*
	2609	* get the current partial byte sequence
	2610	*
	2611	* it needs to be moved between the public and the subconverter
	2612	* so that the conversion framework, which only sees the public
	2613	* converter, can handle truncated and illegal input etc.
	2614	*/
	2615	if(args->converter->toULength > 0) {
	2616	uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
	2617	}
	2618	subArgs.converter->toULength = args->converter->toULength;
	2619
	2620	/*
	2621	* Convert up to the end of the input, or to before the next escape character.
	2622	* Does not handle conversion extensions because the preToU[] state etc.
	2623	* is not copied.
	2624	*/
	2625	ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
	2626
	2627	if(args->offsets != NULL && sourceStart != args->source) {
	2628	/* update offsets to base them on the actual start of the input */
	2629	int32_t *offsets = args->offsets;
	2630	UChar *target = args->target;
	2631	int32_t delta = (int32_t)(args->source - sourceStart);
	2632	while(target < subArgs.target) {
	2633	if(*offsets >= 0) {
	2634	*offsets += delta;
	2635	}
	2636	++offsets;
	2637	++target;
	2638	}
	2639	}
	2640	args->source = subArgs.source;
	2641	args->target = subArgs.target;
	2642	args->offsets = subArgs.offsets;
	2643
	2644	/* copy input/error/overflow buffers */
	2645	if(subArgs.converter->toULength > 0) {
	2646	uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
	2647	}
	2648	args->converter->toULength = subArgs.converter->toULength;
	2649
	2650	if(*err == U_BUFFER_OVERFLOW_ERROR) {
	2651	if(subArgs.converter->UCharErrorBufferLength > 0) {
	2652	uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
	2653	subArgs.converter->UCharErrorBufferLength);
	2654	}
	2655	args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
	2656	subArgs.converter->UCharErrorBufferLength = 0;
	2657	}
	2658	}
	2659
	2660	if (U_FAILURE(*err) \|\| (args->source == args->sourceLimit)) {
	2661	return;
	2662	}
	2663
	2664	escape:
	2665	changeState_2022(args->converter,
	2666	&(args->source),
	2667	args->sourceLimit,
	2668	ISO_2022_KR,
	2669	err);
	2670	}
	2671	}
	2672
	2673	static void U_CALLCONV
	2674	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
	2675	UErrorCode* err){
	2676	char tempBuf[2];
	2677	const char mySource = ( char ) args->source;
	2678	UChar *myTarget = args->target;
	2679	const char *mySourceLimit = args->sourceLimit;
	2680	UChar32 targetUniChar = 0x0000;
	2681	UChar mySourceChar = 0x0000;
	2682	UConverterDataISO2022* myData;
	2683	UConverterSharedData* sharedData ;
	2684	UBool useFallback;
	2685
	2686	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
	2687	if(myData->version==1){
	2688	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
	2689	return;
	2690	}
	2691
	2692	/* initialize state */
	2693	sharedData = myData->currentConverter->sharedData;
	2694	useFallback = args->converter->useFallback;
	2695
	2696	if(myData->key != 0) {
	2697	/* continue with a partial escape sequence */
	2698	goto escape;
	2699	} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
	2700	/* continue with a partial double-byte character */
	2701	mySourceChar = args->converter->toUBytes[0];
	2702	args->converter->toULength = 0;
	2703	goto getTrailByte;
	2704	}
	2705
	2706	while(mySource< mySourceLimit){
	2707
	2708	if(myTarget < args->targetLimit){
	2709
	2710	mySourceChar= (unsigned char) *mySource++;
	2711
	2712	if(mySourceChar==UCNV_SI){
	2713	myData->toU2022State.g = 0;
	2714	if (myData->isEmptySegment) {
	2715	myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
	2716	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	2717	args->converter->toUCallbackReason = UCNV_IRREGULAR;
	2718	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
	2719	args->converter->toULength = 1;
	2720	args->target = myTarget;
	2721	args->source = mySource;
	2722	return;
	2723	}
	2724	/consume the source /
	2725	continue;
	2726	}else if(mySourceChar==UCNV_SO){
	2727	myData->toU2022State.g = 1;
	2728	myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
	2729	/consume the source /
	2730	continue;
	2731	}else if(mySourceChar==ESC_2022){
	2732	mySource--;
	2733	escape:
	2734	myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */
	2735	changeState_2022(args->converter,&(mySource),
	2736	mySourceLimit, ISO_2022_KR, err);
	2737	if(U_FAILURE(*err)){
	2738	args->target = myTarget;
	2739	args->source = mySource;
	2740	return;
	2741	}
	2742	continue;
	2743	}
	2744
	2745	myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */
	2746	if(myData->toU2022State.g == 1) {
	2747	if(mySource < mySourceLimit) {
	2748	int leadIsOk, trailIsOk;
	2749	uint8_t trailByte;
	2750	getTrailByte:
	2751	targetUniChar = missingCharMarker;
	2752	trailByte = (uint8_t)*mySource;
	2753	/*
	2754	* Ticket 5691: consistent illegal sequences:
	2755	* - We include at least the first byte in the illegal sequence.
	2756	* - If any of the non-initial bytes could be the start of a character,
	2757	* we stop the illegal sequence before the first one of those.
	2758	*
	2759	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
	2760	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
	2761	* Otherwise we convert or report the pair of bytes.
	2762	*/
	2763	leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
	2764	trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
	2765	if (leadIsOk && trailIsOk) {
	2766	++mySource;
	2767	tempBuf[0] = (char)(mySourceChar + 0x80);
	2768	tempBuf[1] = (char)(trailByte + 0x80);
	2769	targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
	2770	mySourceChar = (mySourceChar << 8) \| trailByte;
	2771	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {
	2772	/* report a pair of illegal bytes if the second byte is not a DBCS starter */
	2773	++mySource;
	2774	/* add another bit so that the code below writes 2 bytes in case of error */
	2775	mySourceChar = static_cast<UChar>(0x10000 \| (mySourceChar << 8) \| trailByte);
	2776	}
	2777	} else {
	2778	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
	2779	args->converter->toULength = 1;
	2780	break;
	2781	}
	2782	}
	2783	else if(mySourceChar <= 0x7f) {
	2784	targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
	2785	} else {
	2786	targetUniChar = 0xffff;
	2787	}
	2788	if(targetUniChar < 0xfffe){
	2789	if(args->offsets) {
	2790	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
	2791	}
	2792	*(myTarget++)=(UChar)targetUniChar;
	2793	}
	2794	else {
	2795	/* Call the callback function*/
	2796	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
	2797	break;
	2798	}
	2799	}
	2800	else{
	2801	*err =U_BUFFER_OVERFLOW_ERROR;
	2802	break;
	2803	}
	2804	}
	2805	args->target = myTarget;
	2806	args->source = mySource;
	2807	}
	2808
	2809	/************************* END ISO2022-KR *******************************/
	2810
	2811	/************************* ISO-2022-CN *******************************
	2812	*
	2813	* Rules for ISO-2022-CN Encoding:
	2814	* i) The designator sequence must appear once on a line before any instance
	2815	* of character set it designates.
	2816	* ii) If two lines contain characters from the same character set, both lines
	2817	* must include the designator sequence.
	2818	* iii) Once the designator sequence is known, a shifting sequence has to be found
	2819	* to invoke the shifting
	2820	* iv) All lines start in ASCII and end in ASCII.
	2821	* v) Four shifting sequences are employed for this purpose:
	2822	*
	2823	* Sequcence ASCII Eq Charsets
	2824	* ---------- ------- ---------
	2825	* SI <SI> US-ASCII
	2826	* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
	2827	* SS2 <ESC>N CNS-11643-1992 Plane 2
	2828	* SS3 <ESC>O CNS-11643-1992 Planes 3-7
	2829	*
	2830	* vi)
	2831	* SOdesignator : ESC "$" ")" finalchar_for_SO
	2832	* SS2designator : ESC "$" "*" finalchar_for_SS2
	2833	* SS3designator : ESC "$" "+" finalchar_for_SS3
	2834	*
	2835	* ESC $ ) A Indicates the bytes following SO are Chinese
	2836	* characters as defined in GB 2312-80, until
	2837	* another SOdesignation appears
	2838	*
	2839	*
	2840	* ESC $ ) E Indicates the bytes following SO are as defined
	2841	* in ISO-IR-165 (for details, see section 2.1),
	2842	* until another SOdesignation appears
	2843	*
	2844	* ESC $ ) G Indicates the bytes following SO are as defined
	2845	* in CNS 11643-plane-1, until another
	2846	* SOdesignation appears
	2847	*
	2848	* ESC $ * H Indicates the two bytes immediately following
	2849	* SS2 is a Chinese character as defined in CNS
	2850	* 11643-plane-2, until another SS2designation
	2851	* appears
	2852	* (Meaning <ESC>N must preceed every 2 byte
	2853	* sequence.)
	2854	*
	2855	* ESC $ + I Indicates the immediate two bytes following SS3
	2856	* is a Chinese character as defined in CNS
	2857	* 11643-plane-3, until another SS3designation
	2858	* appears
	2859	* (Meaning <ESC>O must preceed every 2 byte
	2860	* sequence.)
	2861	*
	2862	* ESC $ + J Indicates the immediate two bytes following SS3
	2863	* is a Chinese character as defined in CNS
	2864	* 11643-plane-4, until another SS3designation
	2865	* appears
	2866	* (In English: <ESC>O must preceed every 2 byte
	2867	* sequence.)
	2868	*
	2869	* ESC $ + K Indicates the immediate two bytes following SS3
	2870	* is a Chinese character as defined in CNS
	2871	* 11643-plane-5, until another SS3designation
	2872	* appears
	2873	*
	2874	* ESC $ + L Indicates the immediate two bytes following SS3
	2875	* is a Chinese character as defined in CNS
	2876	* 11643-plane-6, until another SS3designation
	2877	* appears
	2878	*
	2879	* ESC $ + M Indicates the immediate two bytes following SS3
	2880	* is a Chinese character as defined in CNS
	2881	* 11643-plane-7, until another SS3designation
	2882	* appears
	2883	*
	2884	* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
	2885	* has its own designation information before any Chinese characters
	2886	* appear
	2887	*
	2888	*/
	2889
	2890	/* The following are defined this way to make the strings truly readonly */
	2891	static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
	2892	static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
	2893	static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
	2894	static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
	2895	static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
	2896	static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
	2897	static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
	2898	static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
	2899	static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
	2900
	2901	/******************** ISO2022-CN Data ************************/
	2902	static const char* const escSeqCharsCN[10] ={
	2903	SHIFT_IN_STR, /* 0 ASCII */
	2904	GB_2312_80_STR, /* 1 GB2312_1 */
	2905	ISO_IR_165_STR, /* 2 ISO_IR_165 */
	2906	CNS_11643_1992_Plane_1_STR,
	2907	CNS_11643_1992_Plane_2_STR,
	2908	CNS_11643_1992_Plane_3_STR,
	2909	CNS_11643_1992_Plane_4_STR,
	2910	CNS_11643_1992_Plane_5_STR,
	2911	CNS_11643_1992_Plane_6_STR,
	2912	CNS_11643_1992_Plane_7_STR
	2913	};
	2914
	2915	static void U_CALLCONV
	2916	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
	2917	UConverter *cnv = args->converter;
	2918	UConverterDataISO2022 *converterData;
	2919	ISO2022State *pFromU2022State;
	2920	uint8_t target = (uint8_t ) args->target;
	2921	const uint8_t targetLimit = (const uint8_t ) args->targetLimit;
	2922	const UChar* source = args->source;
	2923	const UChar* sourceLimit = args->sourceLimit;
	2924	int32_t* offsets = args->offsets;
	2925	UChar32 sourceChar;
	2926	char buffer[8];
	2927	int32_t len;
	2928	int8_t choices[3];
	2929	int32_t choiceCount;
	2930	uint32_t targetValue = 0;
	2931	UBool useFallback;
	2932
	2933	/* set up the state */
	2934	converterData = (UConverterDataISO2022*)cnv->extraInfo;
	2935	pFromU2022State = &converterData->fromU2022State;
	2936
	2937	choiceCount = 0;
	2938
	2939	/* check if the last codepoint of previous buffer was a lead surrogate*/
	2940	if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
	2941	goto getTrail;
	2942	}
	2943
	2944	while( source < sourceLimit){
	2945	if(target < targetLimit){
	2946
	2947	sourceChar = *(source++);
	2948	/check if the char is a First surrogate/
	2949	if(U16_IS_SURROGATE(sourceChar)) {
	2950	if(U16_IS_SURROGATE_LEAD(sourceChar)) {
	2951	getTrail:
	2952	/look ahead to find the trail surrogate/
	2953	if(source < sourceLimit) {
	2954	/* test the following code unit */
	2955	UChar trail=(UChar) *source;
	2956	if(U16_IS_TRAIL(trail)) {
	2957	source++;
	2958	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
	2959	cnv->fromUChar32=0x00;
	2960	/* convert this supplementary code point */
	2961	/* exit this condition tree */
	2962	} else {
	2963	/* this is an unmatched lead code unit (1st surrogate) */
	2964	/* callback(illegal) */
	2965	*err=U_ILLEGAL_CHAR_FOUND;
	2966	cnv->fromUChar32=sourceChar;
	2967	break;
	2968	}
	2969	} else {
	2970	/* no more input */
	2971	cnv->fromUChar32=sourceChar;
	2972	break;
	2973	}
	2974	} else {
	2975	/* this is an unmatched trail code unit (2nd surrogate) */
	2976	/* callback(illegal) */
	2977	*err=U_ILLEGAL_CHAR_FOUND;
	2978	cnv->fromUChar32=sourceChar;
	2979	break;
	2980	}
	2981	}
	2982
	2983	/* do the conversion */
	2984	if(sourceChar <= 0x007f ){
	2985	/* do not convert SO/SI/ESC */
	2986	if(IS_2022_CONTROL(sourceChar)) {
	2987	/* callback(illegal) */
	2988	*err=U_ILLEGAL_CHAR_FOUND;
	2989	cnv->fromUChar32=sourceChar;
	2990	break;
	2991	}
	2992
	2993	/* US-ASCII */
	2994	if(pFromU2022State->g == 0) {
	2995	buffer[0] = (char)sourceChar;
	2996	len = 1;
	2997	} else {
	2998	buffer[0] = UCNV_SI;
	2999	buffer[1] = (char)sourceChar;
	3000	len = 2;
	3001	pFromU2022State->g = 0;
	3002	choiceCount = 0;
	3003	}
	3004	if(sourceChar == CR \|\| sourceChar == LF) {
	3005	/* reset the state at the end of a line */
	3006	uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
	3007	choiceCount = 0;
	3008	}
	3009	}
	3010	else{
	3011	/* convert U+0080..U+10ffff */
	3012	int32_t i;
	3013	int8_t cs, g;
	3014
	3015	if(choiceCount == 0) {
	3016	/* try the current SO/G1 converter first */
	3017	choices[0] = pFromU2022State->cs[1];
	3018
	3019	/* default to GB2312_1 if none is designated yet */
	3020	if(choices[0] == 0) {
	3021	choices[0] = GB2312_1;
	3022	}
	3023
	3024	if(converterData->version == 0) {
	3025	/* ISO-2022-CN */
	3026
	3027	/* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
	3028	if(choices[0] == GB2312_1) {
	3029	choices[1] = (int8_t)CNS_11643_1;
	3030	} else {
	3031	choices[1] = (int8_t)GB2312_1;
	3032	}
	3033
	3034	choiceCount = 2;
	3035	} else if (converterData->version == 1) {
	3036	/* ISO-2022-CN-EXT */
	3037
	3038	/* try one of the other converters */
	3039	switch(choices[0]) {
	3040	case GB2312_1:
	3041	choices[1] = (int8_t)CNS_11643_1;
	3042	choices[2] = (int8_t)ISO_IR_165;
	3043	break;
	3044	case ISO_IR_165:
	3045	choices[1] = (int8_t)GB2312_1;
	3046	choices[2] = (int8_t)CNS_11643_1;
	3047	break;
	3048	default: /* CNS_11643_x */
	3049	choices[1] = (int8_t)GB2312_1;
	3050	choices[2] = (int8_t)ISO_IR_165;
	3051	break;
	3052	}
	3053
	3054	choiceCount = 3;
	3055	} else {
	3056	choices[0] = (int8_t)CNS_11643_1;
	3057	choices[1] = (int8_t)GB2312_1;
	3058	}
	3059	}
	3060
	3061	cs = g = 0;
	3062	/*
	3063	* len==0: no mapping found yet
	3064	* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
	3065	* len>0: found a roundtrip result, done
	3066	*/
	3067	len = 0;
	3068	/*
	3069	* We will turn off useFallback after finding a fallback,
	3070	* but we still get fallbacks from PUA code points as usual.
	3071	* Therefore, we will also need to check that we don't overwrite
	3072	* an early fallback with a later one.
	3073	*/
	3074	useFallback = cnv->useFallback;
	3075
	3076	for(i = 0; i < choiceCount && len <= 0; ++i) {
	3077	int8_t cs0 = choices[i];
	3078	if(cs0 > 0) {
	3079	uint32_t value;
	3080	int32_t len2;
	3081	if(cs0 >= CNS_11643_0) {
	3082	len2 = MBCS_FROM_UCHAR32_ISO2022(
	3083	converterData->myConverterArray[CNS_11643],
	3084	sourceChar,
	3085	&value,
	3086	useFallback,
	3087	MBCS_OUTPUT_3);
	3088	if(len2 == 3 \|\| (len2 == -3 && len == 0)) {
	3089	targetValue = value;
	3090	cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
	3091	if(len2 >= 0) {
	3092	len = 2;
	3093	} else {
	3094	len = -2;
	3095	useFallback = FALSE;
	3096	}
	3097	if(cs == CNS_11643_1) {
	3098	g = 1;
	3099	} else if(cs == CNS_11643_2) {
	3100	g = 2;
	3101	} else /* plane 3..7 */ if(converterData->version == 1) {
	3102	g = 3;
	3103	} else {
	3104	/* ISO-2022-CN (without -EXT) does not support plane 3..7 */
	3105	len = 0;
	3106	}
	3107	}
	3108	} else {
	3109	/* GB2312_1 or ISO-IR-165 */
	3110	U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
	3111	len2 = MBCS_FROM_UCHAR32_ISO2022(
	3112	converterData->myConverterArray[cs0],
	3113	sourceChar,
	3114	&value,
	3115	useFallback,
	3116	MBCS_OUTPUT_2);
	3117	if(len2 == 2 \|\| (len2 == -2 && len == 0)) {
	3118	targetValue = value;
	3119	len = len2;
	3120	cs = cs0;
	3121	g = 1;
	3122	useFallback = FALSE;
	3123	}
	3124	}
	3125	}
	3126	}
	3127
	3128	if(len != 0) {
	3129	len = 0; /* count output bytes; it must have been abs(len) == 2 */
	3130
	3131	/* write the designation sequence if necessary */
	3132	if(cs != pFromU2022State->cs[g]) {
	3133	if(cs < CNS_11643) {
	3134	uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
	3135	} else {
	3136	U_ASSERT(cs >= CNS_11643_1);
	3137	uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
	3138	}
	3139	len = 4;
	3140	pFromU2022State->cs[g] = cs;
	3141	if(g == 1) {
	3142	/* changing the SO/G1 charset invalidates the choices[] */
	3143	choiceCount = 0;
	3144	}
	3145	}
	3146
	3147	/* write the shift sequence if necessary */
	3148	if(g != pFromU2022State->g) {
	3149	switch(g) {
	3150	case 1:
	3151	buffer[len++] = UCNV_SO;
	3152
	3153	/* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
	3154	pFromU2022State->g = 1;
	3155	break;
	3156	case 2:
	3157	buffer[len++] = 0x1b;
	3158	buffer[len++] = 0x4e;
	3159	break;
	3160	default: /* case 3 */
	3161	buffer[len++] = 0x1b;
	3162	buffer[len++] = 0x4f;
	3163	break;
	3164	}
	3165	}
	3166
	3167	/* write the two output bytes */
	3168	buffer[len++] = (char)(targetValue >> 8);
	3169	buffer[len++] = (char)targetValue;
	3170	} else {
	3171	/* if we cannot find the character after checking all codepages
	3172	* then this is an error
	3173	*/
	3174	*err = U_INVALID_CHAR_FOUND;
	3175	cnv->fromUChar32=sourceChar;
	3176	break;
	3177	}
	3178	}
	3179
	3180	/* output len>0 bytes in buffer[] */
	3181	if(len == 1) {
	3182	*target++ = buffer[0];
	3183	if(offsets) {
	3184	offsets++ = (int32_t)(source - args->source - 1); / -1: known to be ASCII */
	3185	}
	3186	} else if(len == 2 && (target + 2) <= targetLimit) {
	3187	*target++ = buffer[0];
	3188	*target++ = buffer[1];
	3189	if(offsets) {
	3190	int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
	3191	*offsets++ = sourceIndex;
	3192	*offsets++ = sourceIndex;
	3193	}
	3194	} else {
	3195	fromUWriteUInt8(
	3196	cnv,
	3197	buffer, len,
	3198	&target, (const char *)targetLimit,
	3199	&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
	3200	err);
	3201	if(U_FAILURE(*err)) {
	3202	break;
	3203	}
	3204	}
	3205	} /* end if(myTargetIndex<myTargetLength) */
	3206	else{
	3207	*err =U_BUFFER_OVERFLOW_ERROR;
	3208	break;
	3209	}
	3210
	3211	}/* end while(mySourceIndex<mySourceLength) */
	3212
	3213	/*
	3214	* the end of the input stream and detection of truncated input
	3215	* are handled by the framework, but for ISO-2022-CN conversion
	3216	* we need to be in ASCII mode at the very end
	3217	*
	3218	* conditions:
	3219	* successful
	3220	* not in ASCII mode
	3221	* end of input and no truncated input
	3222	*/
	3223	if( U_SUCCESS(*err) &&
	3224	pFromU2022State->g!=0 &&
	3225	args->flush && source>=sourceLimit && cnv->fromUChar32==0
	3226	) {
	3227	int32_t sourceIndex;
	3228
	3229	/* we are switching to ASCII */
	3230	pFromU2022State->g=0;
	3231
	3232	/* get the source index of the last input character */
	3233	/*
	3234	* TODO this would be simpler and more reliable if we used a pair
	3235	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
	3236	* so that we could simply use the prevSourceIndex here;
	3237	* this code gives an incorrect result for the rare case of an unmatched
	3238	* trail surrogate that is alone in the last buffer of the text stream
	3239	*/
	3240	sourceIndex=(int32_t)(source-args->source);
	3241	if(sourceIndex>0) {
	3242	--sourceIndex;
	3243	if( U16_IS_TRAIL(args->source[sourceIndex]) &&
	3244	(sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1]))
	3245	) {
	3246	--sourceIndex;
	3247	}
	3248	} else {
	3249	sourceIndex=-1;
	3250	}
	3251
	3252	fromUWriteUInt8(
	3253	cnv,
	3254	SHIFT_IN_STR, 1,
	3255	&target, (const char *)targetLimit,
	3256	&offsets, sourceIndex,
	3257	err);
	3258	}
	3259
	3260	/save the state and return /
	3261	args->source = source;
	3262	args->target = (char*)target;
	3263	}
	3264
	3265
	3266	static void U_CALLCONV
	3267	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
	3268	UErrorCode* err){
	3269	char tempBuf[3];
	3270	const char mySource = (char ) args->source;
	3271	UChar *myTarget = args->target;
	3272	const char *mySourceLimit = args->sourceLimit;
	3273	uint32_t targetUniChar = 0x0000;
	3274	uint32_t mySourceChar = 0x0000;
	3275	UConverterDataISO2022* myData;
	3276	ISO2022State *pToU2022State;
	3277
	3278	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
	3279	pToU2022State = &myData->toU2022State;
	3280
	3281	if(myData->key != 0) {
	3282	/* continue with a partial escape sequence */
	3283	goto escape;
	3284	} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
	3285	/* continue with a partial double-byte character */
	3286	mySourceChar = args->converter->toUBytes[0];
	3287	args->converter->toULength = 0;
	3288	targetUniChar = missingCharMarker;
	3289	goto getTrailByte;
	3290	}
	3291
	3292	while(mySource < mySourceLimit){
	3293
	3294	targetUniChar =missingCharMarker;
	3295
	3296	if(myTarget < args->targetLimit){
	3297
	3298	mySourceChar= (unsigned char) *mySource++;
	3299
	3300	switch(mySourceChar){
	3301	case UCNV_SI:
	3302	pToU2022State->g=0;
	3303	if (myData->isEmptySegment) {
	3304	myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
	3305	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	3306	args->converter->toUCallbackReason = UCNV_IRREGULAR;
	3307	args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
	3308	args->converter->toULength = 1;
	3309	args->target = myTarget;
	3310	args->source = mySource;
	3311	return;
	3312	}
	3313	continue;
	3314
	3315	case UCNV_SO:
	3316	if(pToU2022State->cs[1] != 0) {
	3317	pToU2022State->g=1;
	3318	myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
	3319	continue;
	3320	} else {
	3321	/* illegal to have SO before a matching designator */
	3322	myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */
	3323	break;
	3324	}
	3325
	3326	case ESC_2022:
	3327	mySource--;
	3328	escape:
	3329	{
	3330	const char * mySourceBefore = mySource;
	3331	int8_t toULengthBefore = args->converter->toULength;
	3332
	3333	changeState_2022(args->converter,&(mySource),
	3334	mySourceLimit, ISO_2022_CN,err);
	3335
	3336	/* After SO there must be at least one character before a designator (designator error handled separately) */
	3337	if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
	3338	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
	3339	args->converter->toUCallbackReason = UCNV_IRREGULAR;
	3340	args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
	3341	}
	3342	}
	3343
	3344	/* invalid or illegal escape sequence */
	3345	if(U_FAILURE(*err)){
	3346	args->target = myTarget;
	3347	args->source = mySource;
	3348	myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
	3349	return;
	3350	}
	3351	continue;
	3352
	3353	/* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
	3354
	3355	case CR:
	3356	case LF:
	3357	uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
	3358	U_FALLTHROUGH;
	3359	default:
	3360	/* convert one or two bytes */
	3361	myData->isEmptySegment = FALSE;
	3362	if(pToU2022State->g != 0) {
	3363	if(mySource < mySourceLimit) {
	3364	UConverterSharedData *cnv;
	3365	StateEnum tempState;
	3366	int32_t tempBufLen;
	3367	int leadIsOk, trailIsOk;
	3368	uint8_t trailByte;
	3369	getTrailByte:
	3370	trailByte = (uint8_t)*mySource;
	3371	/*
	3372	* Ticket 5691: consistent illegal sequences:
	3373	* - We include at least the first byte in the illegal sequence.
	3374	* - If any of the non-initial bytes could be the start of a character,
	3375	* we stop the illegal sequence before the first one of those.
	3376	*
	3377	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
	3378	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
	3379	* Otherwise we convert or report the pair of bytes.
	3380	*/
	3381	leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
	3382	trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
	3383	if (leadIsOk && trailIsOk) {
	3384	++mySource;
	3385	tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
	3386	if(tempState >= CNS_11643_0) {
	3387	cnv = myData->myConverterArray[CNS_11643];
	3388	tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
	3389	tempBuf[1] = (char) (mySourceChar);
	3390	tempBuf[2] = (char) trailByte;
	3391	tempBufLen = 3;
	3392
	3393	}else{
	3394	U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
	3395	cnv = myData->myConverterArray[tempState];
	3396	tempBuf[0] = (char) (mySourceChar);
	3397	tempBuf[1] = (char) trailByte;
	3398	tempBufLen = 2;
	3399	}
	3400	targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
	3401	mySourceChar = (mySourceChar << 8) \| trailByte;
	3402	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte))) {
	3403	/* report a pair of illegal bytes if the second byte is not a DBCS starter */
	3404	++mySource;
	3405	/* add another bit so that the code below writes 2 bytes in case of error */
	3406	mySourceChar = 0x10000 \| (mySourceChar << 8) \| trailByte;
	3407	}
	3408	if(pToU2022State->g>=2) {
	3409	/* return from a single-shift state to the previous one */
	3410	pToU2022State->g=pToU2022State->prevG;
	3411	}
	3412	} else {
	3413	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
	3414	args->converter->toULength = 1;
	3415	goto endloop;
	3416	}
	3417	}
	3418	else{
	3419	if(mySourceChar <= 0x7f) {
	3420	targetUniChar = (UChar) mySourceChar;
	3421	}
	3422	}
	3423	break;
	3424	}
	3425	if(targetUniChar < (missingCharMarker-1/0xfffe/)){
	3426	if(args->offsets){
	3427	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
	3428	}
	3429	*(myTarget++)=(UChar)targetUniChar;
	3430	}
	3431	else if(targetUniChar > missingCharMarker){
	3432	/* disassemble the surrogate pair and write to output*/
	3433	targetUniChar-=0x0010000;
	3434	*myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
	3435	if(args->offsets){
	3436	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
	3437	}
	3438	++myTarget;
	3439	if(myTarget< args->targetLimit){
	3440	*myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
	3441	if(args->offsets){
	3442	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
	3443	}
	3444	++myTarget;
	3445	}else{
	3446	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
	3447	(UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
	3448	}
	3449
	3450	}
	3451	else{
	3452	/* Call the callback function*/
	3453	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
	3454	break;
	3455	}
	3456	}
	3457	else{
	3458	*err =U_BUFFER_OVERFLOW_ERROR;
	3459	break;
	3460	}
	3461	}
	3462	endloop:
	3463	args->target = myTarget;
	3464	args->source = mySource;
	3465	}
	3466	#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
	3467
	3468	static void U_CALLCONV
	3469	_ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorCode err) {
	3470	UConverter *cnv = args->converter;
	3471	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->extraInfo;
	3472	ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
	3473	char p, subchar;
	3474	char buffer[8];
	3475	int32_t length;
	3476
	3477	subchar=(char *)cnv->subChars;
	3478	length=cnv->subCharLen; /* assume length==1 for most variants */
	3479
	3480	p = buffer;
	3481	switch(myConverterData->locale[0]){
	3482	case 'j':
	3483	{
	3484	int8_t cs;
	3485
	3486	if(pFromU2022State->g == 1) {
	3487	/* JIS7: switch from G1 to G0 */
	3488	pFromU2022State->g = 0;
	3489	*p++ = UCNV_SI;
	3490	}
	3491
	3492	cs = pFromU2022State->cs[0];
	3493	if(cs != ASCII && cs != JISX201) {
	3494	/* not in ASCII or JIS X 0201: switch to ASCII */
	3495	pFromU2022State->cs[0] = (int8_t)ASCII;
	3496	*p++ = '\x1b';
	3497	*p++ = '\x28';
	3498	*p++ = '\x42';
	3499	}
	3500
	3501	*p++ = subchar[0];
	3502	break;
	3503	}
	3504	case 'c':
	3505	if(pFromU2022State->g != 0) {
	3506	/* not in ASCII mode: switch to ASCII */
	3507	pFromU2022State->g = 0;
	3508	*p++ = UCNV_SI;
	3509	}
	3510	*p++ = subchar[0];
	3511	break;
	3512	case 'k':
	3513	if(myConverterData->version == 0) {
	3514	if(length == 1) {
	3515	if(args->converter->fromUnicodeStatus) {
	3516	/* in DBCS mode: switch to SBCS */
	3517	args->converter->fromUnicodeStatus = 0;
	3518	*p++ = UCNV_SI;
	3519	}
	3520	*p++ = subchar[0];
	3521	} else /* length == 2*/ {
	3522	if(!args->converter->fromUnicodeStatus) {
	3523	/* in SBCS mode: switch to DBCS */
	3524	args->converter->fromUnicodeStatus = 1;
	3525	*p++ = UCNV_SO;
	3526	}
	3527	*p++ = subchar[0];
	3528	*p++ = subchar[1];
	3529	}
	3530	break;
	3531	} else {
	3532	/* save the subconverter's substitution string */
	3533	uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
	3534	int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
	3535
	3536	/* set our substitution string into the subconverter */
	3537	myConverterData->currentConverter->subChars = (uint8_t *)subchar;
	3538	myConverterData->currentConverter->subCharLen = (int8_t)length;
	3539
	3540	/* let the subconverter write the subchar, set/retrieve fromUChar32 state */
	3541	args->converter = myConverterData->currentConverter;
	3542	myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
	3543	ucnv_cbFromUWriteSub(args, 0, err);
	3544	cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
	3545	args->converter = cnv;
	3546
	3547	/* restore the subconverter's substitution string */
	3548	myConverterData->currentConverter->subChars = currentSubChars;
	3549	myConverterData->currentConverter->subCharLen = currentSubCharLen;
	3550
	3551	if(*err == U_BUFFER_OVERFLOW_ERROR) {
	3552	if(myConverterData->currentConverter->charErrorBufferLength > 0) {
	3553	uprv_memcpy(
	3554	cnv->charErrorBuffer,
	3555	myConverterData->currentConverter->charErrorBuffer,
	3556	myConverterData->currentConverter->charErrorBufferLength);
	3557	}
	3558	cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
	3559	myConverterData->currentConverter->charErrorBufferLength = 0;
	3560	}
	3561	return;
	3562	}
	3563	default:
	3564	/* not expected */
	3565	break;
	3566	}
	3567	ucnv_cbFromUWriteBytes(args,
	3568	buffer, (int32_t)(p - buffer),
	3569	offsetIndex, err);
	3570	}
	3571
	3572	/*
	3573	* Structure for cloning an ISO 2022 converter into a single memory block.
	3574	* ucnv_safeClone() of the converter will align the entire cloneStruct,
	3575	* and then ucnv_safeClone() of the sub-converter may additionally align
	3576	* currentConverter inside the cloneStruct, for which we need the deadSpace
	3577	* after currentConverter.
	3578	* This is because UAlignedMemory may be larger than the actually
	3579	* necessary alignment size for the platform.
	3580	* The other cloneStruct fields will not be moved around,
	3581	* and are aligned properly with cloneStruct's alignment.
	3582	*/
	3583	struct cloneStruct
	3584	{
	3585	UConverter cnv;
	3586	UConverter currentConverter;
	3587	UAlignedMemory deadSpace;
	3588	UConverterDataISO2022 mydata;
	3589	};
	3590
	3591
	3592	U_CDECL_BEGIN
	3593
	3594	static UConverter * U_CALLCONV
	3595	_ISO_2022_SafeClone(
	3596	const UConverter *cnv,
	3597	void *stackBuffer,
	3598	int32_t *pBufferSize,
	3599	UErrorCode *status)
	3600	{
	3601	struct cloneStruct * localClone;
	3602	UConverterDataISO2022 *cnvData;
	3603	int32_t i, size;
	3604
	3605	if (pBufferSize == 0) { / 'preflighting' request - set needed size into pBufferSize /
	3606	*pBufferSize = (int32_t)sizeof(struct cloneStruct);
	3607	return NULL;
	3608	}
	3609
	3610	cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
	3611	localClone = (struct cloneStruct *)stackBuffer;
	3612
	3613	/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
	3614
	3615	uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
	3616	localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
	3617	localClone->cnv.isExtraLocal = TRUE;
	3618
	3619	/* share the subconverters */
	3620
	3621	if(cnvData->currentConverter != NULL) {
	3622	size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
	3623	localClone->mydata.currentConverter =
	3624	ucnv_safeClone(cnvData->currentConverter,
	3625	&localClone->currentConverter,
	3626	&size, status);
	3627	if(U_FAILURE(*status)) {
	3628	return NULL;
	3629	}
	3630	}
	3631
	3632	for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
	3633	if(cnvData->myConverterArray[i] != NULL) {
	3634	ucnv_incrementRefCount(cnvData->myConverterArray[i]);
	3635	}
	3636	}
	3637
	3638	return &localClone->cnv;
	3639	}
	3640
	3641	U_CDECL_END
	3642
	3643	static void U_CALLCONV
	3644	_ISO_2022_GetUnicodeSet(const UConverter *cnv,
	3645	const USetAdder *sa,
	3646	UConverterUnicodeSet which,
	3647	UErrorCode *pErrorCode)
	3648	{
	3649	int32_t i;
	3650	UConverterDataISO2022* cnvData;
	3651
	3652	if (U_FAILURE(*pErrorCode)) {
	3653	return;
	3654	}
	3655	#ifdef U_ENABLE_GENERIC_ISO_2022
	3656	if (cnv->sharedData == &_ISO2022Data) {
	3657	/* We use UTF-8 in this case */
	3658	sa->addRange(sa->set, 0, 0xd7FF);
	3659	sa->addRange(sa->set, 0xE000, 0x10FFFF);
	3660	return;
	3661	}
	3662	#endif
	3663
	3664	cnvData = (UConverterDataISO2022*)cnv->extraInfo;
	3665
	3666	/* open a set and initialize it with code points that are algorithmically round-tripped */
	3667	switch(cnvData->locale[0]){
	3668	case 'j':
	3669	/* include JIS X 0201 which is hardcoded */
	3670	sa->add(sa->set, 0xa5);
	3671	sa->add(sa->set, 0x203e);
	3672	if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
	3673	/* include Latin-1 for some variants of JP */
	3674	sa->addRange(sa->set, 0, 0xff);
	3675	} else {
	3676	/* include ASCII for JP */
	3677	sa->addRange(sa->set, 0, 0x7f);
	3678	}
	3679	if(cnvData->version==3 \|\| cnvData->version==4 \|\| which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
	3680	/*
	3681	* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
	3682	* because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
	3683	* use half-width Katakana.
	3684	* This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
	3685	* half-width Katakana via the ESC ( I sequence.
	3686	* However, we only emit (fromUnicode) half-width Katakana according to the
	3687	* definition of each variant.
	3688	*
	3689	* When including fallbacks,
	3690	* we need to include half-width Katakana Unicode code points for all JP variants because
	3691	* JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
	3692	*/
	3693	/* include half-width Katakana for JP */
	3694	sa->addRange(sa->set, HWKANA_START, HWKANA_END);
	3695	}
	3696	break;
	3697	#if !UCONFIG_ONLY_HTML_CONVERSION
	3698	case 'c':
	3699	case 'z':
	3700	/* include ASCII for CN */
	3701	sa->addRange(sa->set, 0, 0x7f);
	3702	break;
	3703	case 'k':
	3704	/* there is only one converter for KR, and it is not in the myConverterArray[] */
	3705	cnvData->currentConverter->sharedData->impl->getUnicodeSet(
	3706	cnvData->currentConverter, sa, which, pErrorCode);
	3707	/* the loop over myConverterArray[] will simply not find another converter */
	3708	break;
	3709	#endif
	3710	default:
	3711	break;
	3712	}
	3713
	3714	#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
	3715	if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&
	3716	cnvData->version==0 && i==CNS_11643
	3717	) {
	3718	/* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
	3719	ucnv_MBCSGetUnicodeSetForBytes(
	3720	cnvData->myConverterArray[i],
	3721	sa, UCNV_ROUNDTRIP_SET,
	3722	0, 0x81, 0x82,
	3723	pErrorCode);
	3724	}
	3725	#endif
	3726
	3727	for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
	3728	UConverterSetFilter filter;
	3729	if(cnvData->myConverterArray[i]!=NULL) {
	3730	if(cnvData->locale[0]=='j' && i==JISX208) {
	3731	/*
	3732	* Only add code points that map to Shift-JIS codes
	3733	* corresponding to JIS X 0208.
	3734	*/
	3735	filter=UCNV_SET_FILTER_SJIS;
	3736	#if !UCONFIG_ONLY_HTML_CONVERSION
	3737	} else if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&
	3738	cnvData->version==0 && i==CNS_11643) {
	3739	/*
	3740	* Version-specific for CN:
	3741	* CN version 0 does not map CNS planes 3..7 although
	3742	* they are all available in the CNS conversion table;
	3743	* CN version 1 (-EXT) does map them all.
	3744	* The two versions create different Unicode sets.
	3745	*/
	3746	filter=UCNV_SET_FILTER_2022_CN;
	3747	} else if(i==KSC5601) {
	3748	/*
	3749	* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
	3750	* are broader than GR94.
	3751	*/
	3752	filter=UCNV_SET_FILTER_GR94DBCS;
	3753	#endif
	3754	} else {
	3755	filter=UCNV_SET_FILTER_NONE;
	3756	}
	3757	ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
	3758	}
	3759	}
	3760
	3761	/*
	3762	* ISO 2022 converters must not convert SO/SI/ESC despite what
	3763	* sub-converters do by themselves.
	3764	* Remove these characters from the set.
	3765	*/
	3766	sa->remove(sa->set, 0x0e);
	3767	sa->remove(sa->set, 0x0f);
	3768	sa->remove(sa->set, 0x1b);
	3769
	3770	/* ISO 2022 converters do not convert C1 controls either */
	3771	sa->removeRange(sa->set, 0x80, 0x9f);
	3772	}
	3773
	3774	static const UConverterImpl _ISO2022Impl={
	3775	UCNV_ISO_2022,
	3776
	3777	NULL,
	3778	NULL,
	3779
	3780	_ISO2022Open,
	3781	_ISO2022Close,
	3782	_ISO2022Reset,
	3783
	3784	#ifdef U_ENABLE_GENERIC_ISO_2022
	3785	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
	3786	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
	3787	ucnv_fromUnicode_UTF8,
	3788	ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
	3789	#else
	3790	NULL,
	3791	NULL,
	3792	NULL,
	3793	NULL,
	3794	#endif
	3795	NULL,
	3796
	3797	NULL,
	3798	_ISO2022getName,
	3799	_ISO_2022_WriteSub,
	3800	_ISO_2022_SafeClone,
	3801	_ISO_2022_GetUnicodeSet,
	3802
	3803	NULL,
	3804	NULL
	3805	};
	3806	static const UConverterStaticData _ISO2022StaticData={
	3807	sizeof(UConverterStaticData),
	3808	"ISO_2022",
	3809	2022,
	3810	UCNV_IBM,
	3811	UCNV_ISO_2022,
	3812	1,
	3813	3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
	3814	{ 0x1a, 0, 0, 0 },
	3815	1,
	3816	FALSE,
	3817	FALSE,
	3818	0,
	3819	0,
	3820	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	3821	};
	3822	const UConverterSharedData _ISO2022Data=
	3823	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl);
	3824
	3825	/***********JP**************/
	3826	static const UConverterImpl _ISO2022JPImpl={
	3827	UCNV_ISO_2022,
	3828
	3829	NULL,
	3830	NULL,
	3831
	3832	_ISO2022Open,
	3833	_ISO2022Close,
	3834	_ISO2022Reset,
	3835
	3836	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
	3837	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
	3838	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
	3839	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
	3840	NULL,
	3841
	3842	NULL,
	3843	_ISO2022getName,
	3844	_ISO_2022_WriteSub,
	3845	_ISO_2022_SafeClone,
	3846	_ISO_2022_GetUnicodeSet,
	3847
	3848	NULL,
	3849	NULL
	3850	};
	3851	static const UConverterStaticData _ISO2022JPStaticData={
	3852	sizeof(UConverterStaticData),
	3853	"ISO_2022_JP",
	3854	0,
	3855	UCNV_IBM,
	3856	UCNV_ISO_2022,
	3857	1,
	3858	6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
	3859	{ 0x1a, 0, 0, 0 },
	3860	1,
	3861	FALSE,
	3862	FALSE,
	3863	0,
	3864	0,
	3865	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	3866	};
	3867
	3868	namespace {
	3869
	3870	const UConverterSharedData _ISO2022JPData=
	3871	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl);
	3872
	3873	} // namespace
	3874
	3875	#if !UCONFIG_ONLY_HTML_CONVERSION
	3876	/*********** KR *************/
	3877	static const UConverterImpl _ISO2022KRImpl={
	3878	UCNV_ISO_2022,
	3879
	3880	NULL,
	3881	NULL,
	3882
	3883	_ISO2022Open,
	3884	_ISO2022Close,
	3885	_ISO2022Reset,
	3886
	3887	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
	3888	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
	3889	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
	3890	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
	3891	NULL,
	3892
	3893	NULL,
	3894	_ISO2022getName,
	3895	_ISO_2022_WriteSub,
	3896	_ISO_2022_SafeClone,
	3897	_ISO_2022_GetUnicodeSet,
	3898
	3899	NULL,
	3900	NULL
	3901	};
	3902	static const UConverterStaticData _ISO2022KRStaticData={
	3903	sizeof(UConverterStaticData),
	3904	"ISO_2022_KR",
	3905	0,
	3906	UCNV_IBM,
	3907	UCNV_ISO_2022,
	3908	1,
	3909	8, /* max 8 bytes per UChar */
	3910	{ 0x1a, 0, 0, 0 },
	3911	1,
	3912	FALSE,
	3913	FALSE,
	3914	0,
	3915	0,
	3916	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	3917	};
	3918
	3919	namespace {
	3920
	3921	const UConverterSharedData _ISO2022KRData=
	3922	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl);
	3923
	3924	} // namespace
	3925
	3926	/************* CN *************/
	3927	static const UConverterImpl _ISO2022CNImpl={
	3928
	3929	UCNV_ISO_2022,
	3930
	3931	NULL,
	3932	NULL,
	3933
	3934	_ISO2022Open,
	3935	_ISO2022Close,
	3936	_ISO2022Reset,
	3937
	3938	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
	3939	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
	3940	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
	3941	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
	3942	NULL,
	3943
	3944	NULL,
	3945	_ISO2022getName,
	3946	_ISO_2022_WriteSub,
	3947	_ISO_2022_SafeClone,
	3948	_ISO_2022_GetUnicodeSet,
	3949
	3950	NULL,
	3951	NULL
	3952	};
	3953	static const UConverterStaticData _ISO2022CNStaticData={
	3954	sizeof(UConverterStaticData),
	3955	"ISO_2022_CN",
	3956	0,
	3957	UCNV_IBM,
	3958	UCNV_ISO_2022,
	3959	1,
	3960	8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
	3961	{ 0x1a, 0, 0, 0 },
	3962	1,
	3963	FALSE,
	3964	FALSE,
	3965	0,
	3966	0,
	3967	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
	3968	};
	3969
	3970	namespace {
	3971
	3972	const UConverterSharedData _ISO2022CNData=
	3973	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
	3974
	3975	} // namespace
	3976	#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
	3977
	3978	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */