git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	******************************************************************************
	3	*
	4	* Copyright (C) 2000-2013, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	******************************************************************************
	8	* file name: ucnvmbcs.c
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 2000jul03
	14	* created by: Markus W. Scherer
	15	*
	16	* The current code in this file replaces the previous implementation
	17	* of conversion code from multi-byte codepages to Unicode and back.
	18	* This implementation supports the following:
	19	* - legacy variable-length codepages with up to 4 bytes per character
	20	* - all Unicode code points (up to 0x10ffff)
	21	* - efficient distinction of unassigned vs. illegal byte sequences
	22	* - it is possible in fromUnicode() to directly deal with simple
	23	* stateful encodings (used for EBCDIC_STATEFUL)
	24	* - it is possible to convert Unicode code points
	25	* to a single zero byte (but not as a fallback except for SBCS)
	26	*
	27	* Remaining limitations in fromUnicode:
	28	* - byte sequences must not have leading zero bytes
	29	* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
	30	* - limitation to up to 4 bytes per character
	31	*
	32	* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these
	33	* limitations and adds m:n character mappings and other features.
	34	* See ucnv_ext.h for details.
	35	*
	36	* Change history:
	37	*
	38	* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
	39	* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
	40	* macros to ucnvmbcs.h file
	41	*/
	42
	43	#include "unicode/utypes.h"
	44
	45	#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
	46
	47	#include "unicode/ucnv.h"
	48	#include "unicode/ucnv_cb.h"
	49	#include "unicode/udata.h"
	50	#include "unicode/uset.h"
	51	#include "unicode/utf8.h"
	52	#include "unicode/utf16.h"
	53	#include "ucnv_bld.h"
	54	#include "ucnvmbcs.h"
	55	#include "ucnv_ext.h"
	56	#include "ucnv_cnv.h"
	57	#include "cmemory.h"
	58	#include "cstring.h"
	59	#include "umutex.h"
	60
	61	/* control optimizations according to the platform */
	62	#define MBCS_UNROLL_SINGLE_TO_BMP 1
	63	#define MBCS_UNROLL_SINGLE_FROM_BMP 0
	64
	65	/*
	66	* _MBCSHeader versions 5.3 & 4.3
	67	* (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
	68	*
	69	* This version is optional. Version 5 is used for incompatible data format changes.
	70	* makeconv will continue to generate version 4 files if possible.
	71	*
	72	* Changes from version 4:
	73	*
	74	* The main difference is an additional _MBCSHeader field with
	75	* - the length (number of uint32_t) of the _MBCSHeader
	76	* - flags for further incompatible data format changes
	77	* - flags for further, backward compatible data format changes
	78	*
	79	* The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
	80	* the file and needs to be reconstituted at load time.
	81	* This requires a utf8Friendly format with an additional mbcsIndex table for fast
	82	* (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
	83	* (For details about these structures see below, and see ucnvmbcs.h.)
	84	*
	85	* utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
	86	* of the Unicode code points. (This requires that the .ucm file has the \|0 etc.
	87	* precision markers for all mappings.)
	88	*
	89	* All fallbacks have been moved to the extension table, leaving only roundtrips in the
	90	* omitted data that can be reconstituted from the toUnicode data.
	91	*
	92	* Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
	93	* With only roundtrip mappings in the base fromUnicode data, this part is fully
	94	* redundant with the mbcsIndex and will be reconstituted from that (also using the
	95	* stage 1 table which contains the information about how stage 2 was compacted).
	96	*
	97	* The rest of the stage 2 table, the part for code points above maxFastUChar,
	98	* is stored in the file and will be appended to the reconstituted part.
	99	*
	100	* The entire fromUBytes array is omitted from the file and will be reconstitued.
	101	* This is done by enumerating all toUnicode roundtrip mappings, performing
	102	* each mapping (using the stage 1 and reconstituted stage 2 tables) and
	103	* writing instead of reading the byte values.
	104	*
	105	* _MBCSHeader version 4.3
	106	*
	107	* Change from version 4.2:
	108	* - Optional utf8Friendly data structures, with 64-entry stage 3 block
	109	* allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
	110	* files which can be used instead of stages 1 & 2.
	111	* Faster lookups for roundtrips from most commonly used characters,
	112	* and lookups from UTF-8 byte sequences with a natural bit distribution.
	113	* See ucnvmbcs.h for more details.
	114	*
	115	* Change from version 4.1:
	116	* - Added an optional extension table structure at the end of the .cnv file.
	117	* It is present if the upper bits of the header flags field contains a non-zero
	118	* byte offset to it.
	119	* Files that contain only a conversion table and no base table
	120	* use the special outputType MBCS_OUTPUT_EXT_ONLY.
	121	* These contain the base table name between the MBCS header and the extension
	122	* data.
	123	*
	124	* Change from version 4.0:
	125	* - Replace header.reserved with header.fromUBytesLength so that all
	126	* fields in the data have length.
	127	*
	128	* Changes from version 3 (for performance improvements):
	129	* - new bit distribution for state table entries
	130	* - reordered action codes
	131	* - new data structure for single-byte fromUnicode
	132	* + stage 2 only contains indexes
	133	* + stage 3 stores 16 bits per character with classification bits 15..8
	134	* - no multiplier for stage 1 entries
	135	* - stage 2 for non-single-byte codepages contains the index and the flags in
	136	* one 32-bit value
	137	* - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers
	138	*
	139	* For more details about old versions of the MBCS data structure, see
	140	* the corresponding versions of this file.
	141	*
	142	* Converting stateless codepage data ---------------------------------------***
	143	* (or codepage data with simple states) to Unicode.
	144	*
	145	* Data structure and algorithm for converting from complex legacy codepages
	146	* to Unicode. (Designed before 2000-may-22.)
	147	*
	148	* The basic idea is that the structure of legacy codepages can be described
	149	* with state tables.
	150	* When reading a byte stream, each input byte causes a state transition.
	151	* Some transitions result in the output of a code point, some result in
	152	* "unassigned" or "illegal" output.
	153	* This is used here for character conversion.
	154	*
	155	* The data structure begins with a state table consisting of a row
	156	* per state, with 256 entries (columns) per row for each possible input
	157	* byte value.
	158	* Each entry is 32 bits wide, with two formats distinguished by
	159	* the sign bit (bit 31):
	160	*
	161	* One format for transitional entries (bit 31 not set) for non-final bytes, and
	162	* one format for final entries (bit 31 set).
	163	* Both formats contain the number of the next state in the same bit
	164	* positions.
	165	* State 0 is the initial state.
	166	*
	167	* Most of the time, the offset values of subsequent states are added
	168	* up to a scalar value. This value will eventually be the index of
	169	* the Unicode code point in a table that follows the state table.
	170	* The effect is that the code points for final state table rows
	171	* are contiguous. The code points of final state rows follow each other
	172	* in the order of the references to those final states by previous
	173	* states, etc.
	174	*
	175	* For some terminal states, the offset is itself the output Unicode
	176	* code point (16 bits for a BMP code point or 20 bits for a supplementary
	177	* code point (stored as code point minus 0x10000 so that 20 bits are enough).
	178	* For others, the code point in the Unicode table is stored with either
	179	* one or two code units: one for BMP code points, two for a pair of
	180	* surrogates.
	181	* All code points for a final state entry take up the same number of code
	182	* units, regardless of whether they all actually _use_ the same number
	183	* of code units. This is necessary for simple array access.
	184	*
	185	* An additional feature comes in with what in ICU is called "fallback"
	186	* mappings:
	187	*
	188	* In addition to round-trippable, precise, 1:1 mappings, there are often
	189	* mappings defined between similar, though not the same, characters.
	190	* Typically, such mappings occur only in fromUnicode mapping tables because
	191	* Unicode has a superset repertoire of most other codepages. However, it
	192	* is possible to provide such mappings in the toUnicode tables, too.
	193	* In this case, the fallback mappings are partly integrated into the
	194	* general state tables because the structure of the encoding includes their
	195	* byte sequences.
	196	* For final entries in an initial state, fallback mappings are stored in
	197	* the entry itself like with roundtrip mappings.
	198	* For other final entries, they are stored in the code units table if
	199	* the entry is for a pair of code units.
	200	* For single-unit results in the code units table, there is no space to
	201	* alternatively hold a fallback mapping; in this case, the code unit
	202	* is stored as U+fffe (unassigned), and the fallback mapping needs to
	203	* be looked up by the scalar offset value in a separate table.
	204	*
	205	* "Unassigned" state entries really mean "structurally unassigned",
	206	* i.e., such a byte sequence will never have a mapping result.
	207	*
	208	* The interpretation of the bits in each entry is as follows:
	209	*
	210	* Bit 31 not set, not a terminal entry ("transitional"):
	211	* 30..24 next state
	212	* 23..0 offset delta, to be added up
	213	*
	214	* Bit 31 set, terminal ("final") entry:
	215	* 30..24 next state (regardless of action code)
	216	* 23..20 action code:
	217	* action codes 0 and 1 result in precise-mapping Unicode code points
	218	* 0 valid byte sequence
	219	* 19..16 not used, 0
	220	* 15..0 16-bit Unicode BMP code point
	221	* never U+fffe or U+ffff
	222	* 1 valid byte sequence
	223	* 19..0 20-bit Unicode supplementary code point
	224	* never U+fffe or U+ffff
	225	*
	226	* action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points
	227	* 2 valid byte sequence (fallback)
	228	* 19..16 not used, 0
	229	* 15..0 16-bit Unicode BMP code point as fallback result
	230	* 3 valid byte sequence (fallback)
	231	* 19..0 20-bit Unicode supplementary code point as fallback result
	232	*
	233	* action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results
	234	* depending on the code units they result in
	235	* 4 valid byte sequence
	236	* 19..9 not used, 0
	237	* 8..0 final offset delta
	238	* pointing to one 16-bit code unit which may be
	239	* fffe unassigned -- look for a fallback for this offset
	240	* ffff illegal
	241	* 5 valid byte sequence
	242	* 19..9 not used, 0
	243	* 8..0 final offset delta
	244	* pointing to two 16-bit code units
	245	* (typically UTF-16 surrogates)
	246	* the result depends on the first code unit as follows:
	247	* 0000..d7ff roundtrip BMP code point (1st alone)
	248	* d800..dbff roundtrip surrogate pair (1st, 2nd)
	249	* dc00..dfff fallback surrogate pair (1st-400, 2nd)
	250	* e000 roundtrip BMP code point (2nd alone)
	251	* e001 fallback BMP code point (2nd alone)
	252	* fffe unassigned
	253	* ffff illegal
	254	* (the final offset deltas are at most 255 * 2,
	255	* times 2 because of storing code unit pairs)
	256	*
	257	* 6 unassigned byte sequence
	258	* 19..16 not used, 0
	259	* 15..0 16-bit Unicode BMP code point U+fffe (new with version 2)
	260	* this does not contain a final offset delta because the main
	261	* purpose of this action code is to save scalar offset values;
	262	* therefore, fallback values cannot be assigned to byte
	263	* sequences that result in this action code
	264	* 7 illegal byte sequence
	265	* 19..16 not used, 0
	266	* 15..0 16-bit Unicode BMP code point U+ffff (new with version 2)
	267	* 8 state change only
	268	* 19..0 not used, 0
	269	* useful for state changes in simple stateful encodings,
	270	* at Shift-In/Shift-Out codes
	271	*
	272	*
	273	* 9..15 reserved for future use
	274	* current implementations will only perform a state change
	275	* and ignore bits 19..0
	276	*
	277	* An encoding with contiguous ranges of unassigned byte sequences, like
	278	* Shift-JIS and especially EUC-TW, can be stored efficiently by having
	279	* at least two states for the trail bytes:
	280	* One trail byte state that results in code points, and one that only
	281	* has "unassigned" and "illegal" terminal states.
	282	*
	283	* Note: partly by accident, this data structure supports simple stateful
	284	* encodings without any additional logic.
	285	* Currently, only simple Shift-In/Shift-Out schemes are handled with
	286	* appropriate state tables (especially EBCDIC_STATEFUL!).
	287	*
	288	* MBCS version 2 added:
	289	* unassigned and illegal action codes have U+fffe and U+ffff
	290	* instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP()
	291	*
	292	* Converting from Unicode to codepage bytes --------------------------------***
	293	*
	294	* The conversion data structure for fromUnicode is designed for the known
	295	* structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to
	296	* a sequence of 1..4 bytes, in addition to a flag that indicates if there is
	297	* a roundtrip mapping.
	298	*
	299	* The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3
	300	* like in the character properties table.
	301	* The beginning of the trie is at offsetFromUTable, the beginning of stage 3
	302	* with the resulting bytes is at offsetFromUBytes.
	303	*
	304	* Beginning with version 4, single-byte codepages have a significantly different
	305	* trie compared to other codepages.
	306	* In all cases, the entry in stage 1 is directly the index of the block of
	307	* 64 entries in stage 2.
	308	*
	309	* Single-byte lookup:
	310	*
	311	* Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3.
	312	* Stage 3 contains one 16-bit word per result:
	313	* Bits 15..8 indicate the kind of result:
	314	* f roundtrip result
	315	* c fallback result from private-use code point
	316	* 8 fallback result from other code points
	317	* 0 unassigned
	318	* Bits 7..0 contain the codepage byte. A zero byte is always possible.
	319	*
	320	* In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly
	321	* file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup
	322	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
	323	* ASCII code points can be looked up with a linear array access into stage 3.
	324	* See maxFastUChar and other details in ucnvmbcs.h.
	325	*
	326	* Multi-byte lookup:
	327	*
	328	* Stage 2 contains a 32-bit word for each 16-block in stage 3:
	329	* Bits 31..16 contain flags for which stage 3 entries contain roundtrip results
	330	* test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
	331	* If this test is false, then a non-zero result will be interpreted as
	332	* a fallback mapping.
	333	* Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char)
	334	*
	335	* Stage 3 contains 2, 3, or 4 bytes per result.
	336	* 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness,
	337	* while 3 bytes are stored as bytes in big-endian order.
	338	* Leading zero bytes are ignored, and the number of bytes is counted.
	339	* A zero byte mapping result is possible as a roundtrip result.
	340	* For some output types, the actual result is processed from this;
	341	* see ucnv_MBCSFromUnicodeWithOffsets().
	342	*
	343	* Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10),
	344	* or (version 3 and up) for BMP-only codepages, it contains 64 entries.
	345	*
	346	* In version 4.3, a utf8Friendly file contains an mbcsIndex table.
	347	* For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup
	348	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
	349	* ASCII code points can be looked up with a linear array access into stage 3.
	350	* See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h.
	351	*
	352	* In version 3, stage 2 blocks may overlap by multiples of the multiplier
	353	* for compaction.
	354	* In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks)
	355	* may overlap by any number of entries.
	356	*
	357	* MBCS version 2 added:
	358	* the converter checks for known output types, which allows
	359	* adding new ones without crashing an unaware converter
	360	*/
	361
	362	static const UConverterImpl _SBCSUTF8Impl;
	363	static const UConverterImpl _DBCSUTF8Impl;
	364
	365	/* GB 18030 data ------------------------------------------------------------ */
	366
	367	/* helper macros for linear values for GB 18030 four-byte sequences */
	368	#define LINEAR_18030(a, b, c, d) ((((a)10+(b))126L+(c))*10L+(d))
	369
	370	#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30)
	371
	372	#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)
	373
	374	/*
	375	* Some ranges of GB 18030 where both the Unicode code points and the
	376	* GB four-byte sequences are contiguous and are handled algorithmically by
	377	* the special callback functions below.
	378	* The values are start & end of Unicode & GB codes.
	379	*
	380	* Note that single surrogates are not mapped by GB 18030
	381	* as of the re-released mapping tables from 2000-nov-30.
	382	*/
	383	static const uint32_t
	384	gb18030Ranges[14][4]={
	385	{0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
	386	{0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
	387	{0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
	388	{0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
	389	{0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
	390	{0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
	391	{0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
	392	{0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)},
	393	{0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)},
	394	{0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)},
	395	{0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)},
	396	{0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)},
	397	{0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)},
	398	{0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)}
	399	};
	400
	401	/* bit flag for UConverter.options indicating GB 18030 special handling */
	402	#define _MBCS_OPTION_GB18030 0x8000
	403
	404	/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
	405	#define _MBCS_OPTION_KEIS 0x01000
	406	#define _MBCS_OPTION_JEF 0x02000
	407	#define _MBCS_OPTION_JIPS 0x04000
	408
	409	#define KEIS_SO_CHAR_1 0x0A
	410	#define KEIS_SO_CHAR_2 0x42
	411	#define KEIS_SI_CHAR_1 0x0A
	412	#define KEIS_SI_CHAR_2 0x41
	413
	414	#define JEF_SO_CHAR 0x28
	415	#define JEF_SI_CHAR 0x29
	416
	417	#define JIPS_SO_CHAR_1 0x1A
	418	#define JIPS_SO_CHAR_2 0x70
	419	#define JIPS_SI_CHAR_1 0x1A
	420	#define JIPS_SI_CHAR_2 0x71
	421
	422	enum SISO_Option {
	423	SI,
	424	SO
	425	};
	426	typedef enum SISO_Option SISO_Option;
	427
	428	static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
	429	int32_t SISOLength = 0;
	430
	431	switch (option) {
	432	case SI:
	433	if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
	434	value[0] = KEIS_SI_CHAR_1;
	435	value[1] = KEIS_SI_CHAR_2;
	436	SISOLength = 2;
	437	} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
	438	value[0] = JEF_SI_CHAR;
	439	SISOLength = 1;
	440	} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
	441	value[0] = JIPS_SI_CHAR_1;
	442	value[1] = JIPS_SI_CHAR_2;
	443	SISOLength = 2;
	444	} else {
	445	value[0] = UCNV_SI;
	446	SISOLength = 1;
	447	}
	448	break;
	449	case SO:
	450	if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
	451	value[0] = KEIS_SO_CHAR_1;
	452	value[1] = KEIS_SO_CHAR_2;
	453	SISOLength = 2;
	454	} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
	455	value[0] = JEF_SO_CHAR;
	456	SISOLength = 1;
	457	} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
	458	value[0] = JIPS_SO_CHAR_1;
	459	value[1] = JIPS_SO_CHAR_2;
	460	SISOLength = 2;
	461	} else {
	462	value[0] = UCNV_SO;
	463	SISOLength = 1;
	464	}
	465	break;
	466	default:
	467	/* Should never happen. */
	468	break;
	469	}
	470
	471	return SISOLength;
	472	}
	473
	474	/* Miscellaneous ------------------------------------------------------------ */
	475
	476	/**
	477	* Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
	478	* consecutive sequences of bytes, starting from the one encoded in value,
	479	* to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
	480	* Does not currently support m:n mappings or reverse fallbacks.
	481	* This function will not be called for sequences of bytes with leading zeros.
	482	*
	483	* @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
	484	* @param value contains 1..4 bytes of the first byte sequence, right-aligned
	485	* @param codePoints resulting Unicode code points, or negative if a byte sequence does
	486	* not map to anything
	487	* @return TRUE to continue enumeration, FALSE to stop
	488	*/
	489	typedef UBool U_CALLCONV
	490	UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
	491
	492	/* similar to ucnv_MBCSGetNextUChar() but recursive */
	493	static UBool
	494	enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
	495	int32_t state, uint32_t offset,
	496	uint32_t value,
	497	UConverterEnumToUCallback callback, const void context,
	498	UErrorCode *pErrorCode) {
	499	UChar32 codePoints[32];
	500	const int32_t *row;
	501	const uint16_t *unicodeCodeUnits;
	502	UChar32 anyCodePoints;
	503	int32_t b, limit;
	504
	505	row=mbcsTable->stateTable[state];
	506	unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
	507
	508	value<<=8;
	509	anyCodePoints=-1; /* becomes non-negative if there is a mapping */
	510
	511	b=(stateProps[state]&0x38)<<2;
	512	if(b==0 && stateProps[state]>=0x40) {
	513	/* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
	514	codePoints[0]=U_SENTINEL;
	515	b=1;
	516	}
	517	limit=((stateProps[state]&7)+1)<<5;
	518	while(b<limit) {
	519	int32_t entry=row[b];
	520	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	521	int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry);
	522	if(stateProps[nextState]>=0) {
	523	/* recurse to a state with non-ignorable actions */
	524	if(!enumToU(
	525	mbcsTable, stateProps, nextState,
	526	offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
	527	value\|(uint32_t)b,
	528	callback, context,
	529	pErrorCode)) {
	530	return FALSE;
	531	}
	532	}
	533	codePoints[b&0x1f]=U_SENTINEL;
	534	} else {
	535	UChar32 c;
	536	int32_t action;
	537
	538	/*
	539	* An if-else-if chain provides more reliable performance for
	540	* the most common cases compared to a switch.
	541	*/
	542	action=MBCS_ENTRY_FINAL_ACTION(entry);
	543	if(action==MBCS_STATE_VALID_DIRECT_16) {
	544	/* output BMP code point */
	545	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	546	} else if(action==MBCS_STATE_VALID_16) {
	547	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
	548	c=unicodeCodeUnits[finalOffset];
	549	if(c<0xfffe) {
	550	/* output BMP code point */
	551	} else {
	552	c=U_SENTINEL;
	553	}
	554	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	555	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
	556	c=unicodeCodeUnits[finalOffset++];
	557	if(c<0xd800) {
	558	/* output BMP code point below 0xd800 */
	559	} else if(c<=0xdbff) {
	560	/* output roundtrip or fallback supplementary code point */
	561	c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
	562	} else if(c==0xe000) {
	563	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	564	c=unicodeCodeUnits[finalOffset];
	565	} else {
	566	c=U_SENTINEL;
	567	}
	568	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
	569	/* output supplementary code point */
	570	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
	571	} else {
	572	c=U_SENTINEL;
	573	}
	574
	575	codePoints[b&0x1f]=c;
	576	anyCodePoints&=c;
	577	}
	578	if(((++b)&0x1f)==0) {
	579	if(anyCodePoints>=0) {
	580	if(!callback(context, value\|(uint32_t)(b-0x20), codePoints)) {
	581	return FALSE;
	582	}
	583	anyCodePoints=-1;
	584	}
	585	}
	586	}
	587	return TRUE;
	588	}
	589
	590	/*
	591	* Only called if stateProps[state]==-1.
	592	* A recursive call may do stateProps[state]\|=0x40 if this state is the target of an
	593	* MBCS_STATE_CHANGE_ONLY.
	594	*/
	595	static int8_t
	596	getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
	597	const int32_t *row;
	598	int32_t min, max, entry, nextState;
	599
	600	row=stateTable[state];
	601	stateProps[state]=0;
	602
	603	/* find first non-ignorable state */
	604	for(min=0;; ++min) {
	605	entry=row[min];
	606	nextState=MBCS_ENTRY_STATE(entry);
	607	if(stateProps[nextState]==-1) {
	608	getStateProp(stateTable, stateProps, nextState);
	609	}
	610	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	611	if(stateProps[nextState]>=0) {
	612	break;
	613	}
	614	} else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
	615	break;
	616	}
	617	if(min==0xff) {
	618	stateProps[state]=-0x40; /* (int8_t)0xc0 */
	619	return stateProps[state];
	620	}
	621	}
	622	stateProps[state]\|=(int8_t)((min>>5)<<3);
	623
	624	/* find last non-ignorable state */
	625	for(max=0xff; min<max; --max) {
	626	entry=row[max];
	627	nextState=MBCS_ENTRY_STATE(entry);
	628	if(stateProps[nextState]==-1) {
	629	getStateProp(stateTable, stateProps, nextState);
	630	}
	631	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	632	if(stateProps[nextState]>=0) {
	633	break;
	634	}
	635	} else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
	636	break;
	637	}
	638	}
	639	stateProps[state]\|=(int8_t)(max>>5);
	640
	641	/* recurse further and collect direct-state information */
	642	while(min<=max) {
	643	entry=row[min];
	644	nextState=MBCS_ENTRY_STATE(entry);
	645	if(stateProps[nextState]==-1) {
	646	getStateProp(stateTable, stateProps, nextState);
	647	}
	648	if(MBCS_ENTRY_IS_FINAL(entry)) {
	649	stateProps[nextState]\|=0x40;
	650	if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) {
	651	stateProps[state]\|=0x40;
	652	}
	653	}
	654	++min;
	655	}
	656	return stateProps[state];
	657	}
	658
	659	/*
	660	* Internal function enumerating the toUnicode data of an MBCS converter.
	661	* Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
	662	* table, but could also be used for a future ucnv_getUnicodeSet() option
	663	* that includes reverse fallbacks (after updating this function's implementation).
	664	* Currently only handles roundtrip mappings.
	665	* Does not currently handle extensions.
	666	*/
	667	static void
	668	ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
	669	UConverterEnumToUCallback callback, const void context,
	670	UErrorCode *pErrorCode) {
	671	/*
	672	* Properties for each state, to speed up the enumeration.
	673	* Ignorable actions are unassigned/illegal/state-change-only:
	674	* They do not lead to mappings.
	675	*
	676	* Bits 7..6:
	677	* 1 direct/initial state (stateful converters have multiple)
	678	* 0 non-initial state with transitions or with non-ignorable result actions
	679	* -1 final state with only ignorable actions
	680	*
	681	* Bits 5..3:
	682	* The lowest byte value with non-ignorable actions is
	683	* value<<5 (rounded down).
	684	*
	685	* Bits 2..0:
	686	* The highest byte value with non-ignorable actions is
	687	* (value<<5)&0x1f (rounded up).
	688	*/
	689	int8_t stateProps[MBCS_MAX_STATE_COUNT];
	690	int32_t state;
	691
	692	uprv_memset(stateProps, -1, sizeof(stateProps));
	693
	694	/* recurse from state 0 and set all stateProps */
	695	getStateProp(mbcsTable->stateTable, stateProps, 0);
	696
	697	for(state=0; state<mbcsTable->countStates; ++state) {
	698	/*if(stateProps[state]==-1) {
	699	printf("unused/unreachable <icu:state> %d\n", state);
	700	}*/
	701	if(stateProps[state]>=0x40) {
	702	/* start from each direct state */
	703	enumToU(
	704	mbcsTable, stateProps, state, 0, 0,
	705	callback, context,
	706	pErrorCode);
	707	}
	708	}
	709	}
	710
	711	U_CFUNC void
	712	ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
	713	const USetAdder *sa,
	714	UConverterUnicodeSet which,
	715	UConverterSetFilter filter,
	716	UErrorCode *pErrorCode) {
	717	const UConverterMBCSTable *mbcsTable;
	718	const uint16_t *table;
	719
	720	uint32_t st3;
	721	uint16_t st1, maxStage1, st2;
	722
	723	UChar32 c;
	724
	725	/* enumerate the from-Unicode trie table */
	726	mbcsTable=&sharedData->mbcs;
	727	table=mbcsTable->fromUnicodeTable;
	728	if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
	729	maxStage1=0x440;
	730	} else {
	731	maxStage1=0x40;
	732	}
	733
	734	c=0; /* keep track of the current code point while enumerating */
	735
	736	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
	737	const uint16_t stage2, stage3, *results;
	738	uint16_t minValue;
	739
	740	results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
	741
	742	/*
	743	* Set a threshold variable for selecting which mappings to use.
	744	* See ucnv_MBCSSingleFromBMPWithOffsets() and
	745	* MBCS_SINGLE_RESULT_FROM_U() for details.
	746	*/
	747	if(which==UCNV_ROUNDTRIP_SET) {
	748	/* use only roundtrips */
	749	minValue=0xf00;
	750	} else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
	751	/* use all roundtrip and fallback results */
	752	minValue=0x800;
	753	}
	754
	755	for(st1=0; st1<maxStage1; ++st1) {
	756	st2=table[st1];
	757	if(st2>maxStage1) {
	758	stage2=table+st2;
	759	for(st2=0; st2<64; ++st2) {
	760	if((st3=stage2[st2])!=0) {
	761	/* read the stage 3 block */
	762	stage3=results+st3;
	763
	764	do {
	765	if(*stage3++>=minValue) {
	766	sa->add(sa->set, c);
	767	}
	768	} while((++c&0xf)!=0);
	769	} else {
	770	c+=16; /* empty stage 3 block */
	771	}
	772	}
	773	} else {
	774	c+=1024; /* empty stage 2 block */
	775	}
	776	}
	777	} else {
	778	const uint32_t *stage2;
	779	const uint8_t stage3, bytes;
	780	uint32_t st3Multiplier;
	781	uint32_t value;
	782	UBool useFallback;
	783
	784	bytes=mbcsTable->fromUnicodeBytes;
	785
	786	useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
	787
	788	switch(mbcsTable->outputType) {
	789	case MBCS_OUTPUT_3:
	790	case MBCS_OUTPUT_4_EUC:
	791	st3Multiplier=3;
	792	break;
	793	case MBCS_OUTPUT_4:
	794	st3Multiplier=4;
	795	break;
	796	default:
	797	st3Multiplier=2;
	798	break;
	799	}
	800
	801	for(st1=0; st1<maxStage1; ++st1) {
	802	st2=table[st1];
	803	if(st2>(maxStage1>>1)) {
	804	stage2=(const uint32_t *)table+st2;
	805	for(st2=0; st2<64; ++st2) {
	806	if((st3=stage2[st2])!=0) {
	807	/* read the stage 3 block */
	808	stage3=bytes+st3Multiplier16(uint32_t)(uint16_t)st3;
	809
	810	/* get the roundtrip flags for the stage 3 block */
	811	st3>>=16;
	812
	813	/*
	814	* Add code points for which the roundtrip flag is set,
	815	* or which map to non-zero bytes if we use fallbacks.
	816	* See ucnv_MBCSFromUnicodeWithOffsets() for details.
	817	*/
	818	switch(filter) {
	819	case UCNV_SET_FILTER_NONE:
	820	do {
	821	if(st3&1) {
	822	sa->add(sa->set, c);
	823	stage3+=st3Multiplier;
	824	} else if(useFallback) {
	825	uint8_t b=0;
	826	switch(st3Multiplier) {
	827	case 4:
	828	b\|=*stage3++;
	829	case 3: /fall through/
	830	b\|=*stage3++;
	831	case 2: /fall through/
	832	b\|=stage3[0]\|stage3[1];
	833	stage3+=2;
	834	default:
	835	break;
	836	}
	837	if(b!=0) {
	838	sa->add(sa->set, c);
	839	}
	840	}
	841	st3>>=1;
	842	} while((++c&0xf)!=0);
	843	break;
	844	case UCNV_SET_FILTER_DBCS_ONLY:
	845	/* Ignore single-byte results (<0x100). */
	846	do {
	847	if(((st3&1)!=0 \|\| useFallback) && ((const uint16_t )stage3)>=0x100) {
	848	sa->add(sa->set, c);
	849	}
	850	st3>>=1;
	851	stage3+=2; /* +=st3Multiplier */
	852	} while((++c&0xf)!=0);
	853	break;
	854	case UCNV_SET_FILTER_2022_CN:
	855	/* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
	856	do {
	857	if(((st3&1)!=0 \|\| useFallback) && ((value=*stage3)==0x81 \|\| value==0x82)) {
	858	sa->add(sa->set, c);
	859	}
	860	st3>>=1;
	861	stage3+=3; /* +=st3Multiplier */
	862	} while((++c&0xf)!=0);
	863	break;
	864	case UCNV_SET_FILTER_SJIS:
	865	/* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
	866	do {
	867	if(((st3&1)!=0 \|\| useFallback) && (value=((const uint16_t )stage3))>=0x8140 && value<=0xeffc) {
	868	sa->add(sa->set, c);
	869	}
	870	st3>>=1;
	871	stage3+=2; /* +=st3Multiplier */
	872	} while((++c&0xf)!=0);
	873	break;
	874	case UCNV_SET_FILTER_GR94DBCS:
	875	/* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
	876	do {
	877	if( ((st3&1)!=0 \|\| useFallback) &&
	878	(uint16_t)((value=((const uint16_t )stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
	879	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
	880	) {
	881	sa->add(sa->set, c);
	882	}
	883	st3>>=1;
	884	stage3+=2; /* +=st3Multiplier */
	885	} while((++c&0xf)!=0);
	886	break;
	887	case UCNV_SET_FILTER_HZ:
	888	/* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
	889	do {
	890	if( ((st3&1)!=0 \|\| useFallback) &&
	891	(uint16_t)((value=((const uint16_t )stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
	892	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
	893	) {
	894	sa->add(sa->set, c);
	895	}
	896	st3>>=1;
	897	stage3+=2; /* +=st3Multiplier */
	898	} while((++c&0xf)!=0);
	899	break;
	900	default:
	901	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	902	return;
	903	}
	904	} else {
	905	c+=16; /* empty stage 3 block */
	906	}
	907	}
	908	} else {
	909	c+=1024; /* empty stage 2 block */
	910	}
	911	}
	912	}
	913
	914	ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
	915	}
	916
	917	U_CFUNC void
	918	ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
	919	const USetAdder *sa,
	920	UConverterUnicodeSet which,
	921	UErrorCode *pErrorCode) {
	922	ucnv_MBCSGetFilteredUnicodeSetForUnicode(
	923	sharedData, sa, which,
	924	sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
	925	UCNV_SET_FILTER_DBCS_ONLY :
	926	UCNV_SET_FILTER_NONE,
	927	pErrorCode);
	928	}
	929
	930	static void
	931	ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
	932	const USetAdder *sa,
	933	UConverterUnicodeSet which,
	934	UErrorCode *pErrorCode) {
	935	if(cnv->options&_MBCS_OPTION_GB18030) {
	936	sa->addRange(sa->set, 0, 0xd7ff);
	937	sa->addRange(sa->set, 0xe000, 0x10ffff);
	938	} else {
	939	ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
	940	}
	941	}
	942
	943	/* conversion extensions for input not in the main table -------------------- */
	944
	945	/*
	946	* Hardcoded extension handling for GB 18030.
	947	* Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
	948	*
	949	* In the future, conversion extensions may handle m:n mappings and delta tables,
	950	* see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html
	951	*
	952	* If an input character cannot be mapped, then these functions set an error
	953	* code. The framework will then call the callback function.
	954	*/
	955
	956	/*
	957	* @return if(U_FAILURE) return the code point for cnv->fromUChar32
	958	* else return 0 after output has been written to the target
	959	*/
	960	static UChar32
	961	_extFromU(UConverter cnv, const UConverterSharedData sharedData,
	962	UChar32 cp,
	963	const UChar *source, const UChar sourceLimit,
	964	uint8_t *target, const uint8_t targetLimit,
	965	int32_t **offsets, int32_t sourceIndex,
	966	UBool flush,
	967	UErrorCode *pErrorCode) {
	968	const int32_t *cx;
	969
	970	cnv->useSubChar1=FALSE;
	971
	972	if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
	973	ucnv_extInitialMatchFromU(
	974	cnv, cx,
	975	cp, source, sourceLimit,
	976	(char *)target, (char )targetLimit,
	977	offsets, sourceIndex,
	978	flush,
	979	pErrorCode)
	980	) {
	981	return 0; /* an extension mapping handled the input */
	982	}
	983
	984	/* GB 18030 */
	985	if((cnv->options&_MBCS_OPTION_GB18030)!=0) {
	986	const uint32_t *range;
	987	int32_t i;
	988
	989	range=gb18030Ranges[0];
	990	for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
	991	if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
	992	/* found the Unicode code point, output the four-byte sequence for it */
	993	uint32_t linear;
	994	char bytes[4];
	995
	996	/* get the linear value of the first GB 18030 code in this range */
	997	linear=range[2]-LINEAR_18030_BASE;
	998
	999	/* add the offset from the beginning of the range */
	1000	linear+=((uint32_t)cp-range[0]);
	1001
	1002	/* turn this into a four-byte sequence */
	1003	bytes[3]=(char)(0x30+linear%10); linear/=10;
	1004	bytes[2]=(char)(0x81+linear%126); linear/=126;
	1005	bytes[1]=(char)(0x30+linear%10); linear/=10;
	1006	bytes[0]=(char)(0x81+linear);
	1007
	1008	/* output this sequence */
	1009	ucnv_fromUWriteBytes(cnv,
	1010	bytes, 4, (char *)target, (char )targetLimit,
	1011	offsets, sourceIndex, pErrorCode);
	1012	return 0;
	1013	}
	1014	}
	1015	}
	1016
	1017	/* no mapping */
	1018	*pErrorCode=U_INVALID_CHAR_FOUND;
	1019	return cp;
	1020	}
	1021
	1022	/*
	1023	* Input sequence: cnv->toUBytes[0..length[
	1024	* @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
	1025	* else return 0 after output has been written to the target
	1026	*/
	1027	static int8_t
	1028	_extToU(UConverter cnv, const UConverterSharedData sharedData,
	1029	int8_t length,
	1030	const uint8_t *source, const uint8_t sourceLimit,
	1031	UChar *target, const UChar targetLimit,
	1032	int32_t **offsets, int32_t sourceIndex,
	1033	UBool flush,
	1034	UErrorCode *pErrorCode) {
	1035	const int32_t *cx;
	1036
	1037	if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
	1038	ucnv_extInitialMatchToU(
	1039	cnv, cx,
	1040	length, (const char *)source, (const char )sourceLimit,
	1041	target, targetLimit,
	1042	offsets, sourceIndex,
	1043	flush,
	1044	pErrorCode)
	1045	) {
	1046	return 0; /* an extension mapping handled the input */
	1047	}
	1048
	1049	/* GB 18030 */
	1050	if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
	1051	const uint32_t *range;
	1052	uint32_t linear;
	1053	int32_t i;
	1054
	1055	linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]);
	1056	range=gb18030Ranges[0];
	1057	for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
	1058	if(range[2]<=linear && linear<=range[3]) {
	1059	/* found the sequence, output the Unicode code point for it */
	1060	*pErrorCode=U_ZERO_ERROR;
	1061
	1062	/* add the linear difference between the input and start sequences to the start code point */
	1063	linear=range[0]+(linear-range[2]);
	1064
	1065	/* output this code point */
	1066	ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode);
	1067
	1068	return 0;
	1069	}
	1070	}
	1071	}
	1072
	1073	/* no mapping */
	1074	*pErrorCode=U_INVALID_CHAR_FOUND;
	1075	return length;
	1076	}
	1077
	1078	/* EBCDIC swap LF<->NL ------------------------------------------------------ */
	1079
	1080	/*
	1081	* This code modifies a standard EBCDIC<->Unicode mapping table for
	1082	* OS/390 (z/OS) Unix System Services (Open Edition).
	1083	* The difference is in the mapping of Line Feed and New Line control codes:
	1084	* Standard EBCDIC maps
	1085	*
	1086	* <U000A> \x25 \|0
	1087	* <U0085> \x15 \|0
	1088	*
	1089	* but OS/390 USS EBCDIC swaps the control codes for LF and NL,
	1090	* mapping
	1091	*
	1092	* <U000A> \x15 \|0
	1093	* <U0085> \x25 \|0
	1094	*
	1095	* This code modifies a loaded standard EBCDIC<->Unicode mapping table
	1096	* by copying it into allocated memory and swapping the LF and NL values.
	1097	* It allows to support the same EBCDIC charset in both versions without
	1098	* duplicating the entire installed table.
	1099	*/
	1100
	1101	/* standard EBCDIC codes */
	1102	#define EBCDIC_LF 0x25
	1103	#define EBCDIC_NL 0x15
	1104
	1105	/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
	1106	#define EBCDIC_RT_LF 0xf25
	1107	#define EBCDIC_RT_NL 0xf15
	1108
	1109	/* Unicode code points */
	1110	#define U_LF 0x0a
	1111	#define U_NL 0x85
	1112
	1113	static UBool
	1114	_EBCDICSwapLFNL(UConverterSharedData sharedData, UErrorCode pErrorCode) {
	1115	UConverterMBCSTable *mbcsTable;
	1116
	1117	const uint16_t table, results;
	1118	const uint8_t *bytes;
	1119
	1120	int32_t (*newStateTable)[256];
	1121	uint16_t *newResults;
	1122	uint8_t *p;
	1123	char *name;
	1124
	1125	uint32_t stage2Entry;
	1126	uint32_t size, sizeofFromUBytes;
	1127
	1128	mbcsTable=&sharedData->mbcs;
	1129
	1130	table=mbcsTable->fromUnicodeTable;
	1131	bytes=mbcsTable->fromUnicodeBytes;
	1132	results=(const uint16_t *)bytes;
	1133
	1134	/*
	1135	* Check that this is an EBCDIC table with SBCS portion -
	1136	* SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings.
	1137	*
	1138	* If not, ignore the option. Options are always ignored if they do not apply.
	1139	*/
	1140	if(!(
	1141	(mbcsTable->outputType==MBCS_OUTPUT_1 \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) &&
	1142	mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
	1143	mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)
	1144	)) {
	1145	return FALSE;
	1146	}
	1147
	1148	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
	1149	if(!(
	1150	EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
	1151	EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL)
	1152	)) {
	1153	return FALSE;
	1154	}
	1155	} else /* MBCS_OUTPUT_2_SISO */ {
	1156	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
	1157	if(!(
	1158	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 &&
	1159	EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF)
	1160	)) {
	1161	return FALSE;
	1162	}
	1163
	1164	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
	1165	if(!(
	1166	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 &&
	1167	EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL)
	1168	)) {
	1169	return FALSE;
	1170	}
	1171	}
	1172
	1173	if(mbcsTable->fromUBytesLength>0) {
	1174	/*
	1175	* We _know_ the number of bytes in the fromUnicodeBytes array
	1176	* starting with header.version 4.1.
	1177	*/
	1178	sizeofFromUBytes=mbcsTable->fromUBytesLength;
	1179	} else {
	1180	/*
	1181	* Otherwise:
	1182	* There used to be code to enumerate the fromUnicode
	1183	* trie and find the highest entry, but it was removed in ICU 3.2
	1184	* because it was not tested and caused a low code coverage number.
	1185	* See Jitterbug 3674.
	1186	* This affects only some .cnv file formats with a header.version
	1187	* below 4.1, and only when swaplfnl is requested.
	1188	*
	1189	* ucnvmbcs.c revision 1.99 is the last one with the
	1190	* ucnv_MBCSSizeofFromUBytes() function.
	1191	*/
	1192	*pErrorCode=U_INVALID_FORMAT_ERROR;
	1193	return FALSE;
	1194	}
	1195
	1196	/*
	1197	* The table has an appropriate format.
	1198	* Allocate and build
	1199	* - a modified to-Unicode state table
	1200	* - a modified from-Unicode output array
	1201	* - a converter name string with the swap option appended
	1202	*/
	1203	size=
	1204	mbcsTable->countStates*1024+
	1205	sizeofFromUBytes+
	1206	UCNV_MAX_CONVERTER_NAME_LENGTH+20;
	1207	p=(uint8_t *)uprv_malloc(size);
	1208	if(p==NULL) {
	1209	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	1210	return FALSE;
	1211	}
	1212
	1213	/* copy and modify the to-Unicode state table */
	1214	newStateTable=(int32_t (*)[256])p;
	1215	uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024);
	1216
	1217	newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
	1218	newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
	1219
	1220	/* copy and modify the from-Unicode result table */
	1221	newResults=(uint16_t *)newStateTable[mbcsTable->countStates];
	1222	uprv_memcpy(newResults, bytes, sizeofFromUBytes);
	1223
	1224	/* conveniently, the table access macros work on the left side of expressions */
	1225	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
	1226	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL;
	1227	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF;
	1228	} else /* MBCS_OUTPUT_2_SISO */ {
	1229	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
	1230	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL;
	1231
	1232	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
	1233	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF;
	1234	}
	1235
	1236	/* set the canonical converter name */
	1237	name=(char *)newResults+sizeofFromUBytes;
	1238	uprv_strcpy(name, sharedData->staticData->name);
	1239	uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING);
	1240
	1241	/* set the pointers */
	1242	umtx_lock(NULL);
	1243	if(mbcsTable->swapLFNLStateTable==NULL) {
	1244	mbcsTable->swapLFNLStateTable=newStateTable;
	1245	mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults;
	1246	mbcsTable->swapLFNLName=name;
	1247
	1248	newStateTable=NULL;
	1249	}
	1250	umtx_unlock(NULL);
	1251
	1252	/* release the allocated memory if another thread beat us to it */
	1253	if(newStateTable!=NULL) {
	1254	uprv_free(newStateTable);
	1255	}
	1256	return TRUE;
	1257	}
	1258
	1259	/* reconstitute omitted fromUnicode data ------------------------------------ */
	1260
	1261	/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
	1262	static UBool U_CALLCONV
	1263	writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
	1264	UConverterMBCSTable mbcsTable=(UConverterMBCSTable )context;
	1265	const uint16_t *table;
	1266	uint32_t *stage2;
	1267	uint8_t bytes, p;
	1268	UChar32 c;
	1269	int32_t i, st3;
	1270
	1271	table=mbcsTable->fromUnicodeTable;
	1272	bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
	1273
	1274	/* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
	1275	switch(mbcsTable->outputType) {
	1276	case MBCS_OUTPUT_3_EUC:
	1277	if(value<=0xffff) {
	1278	/* short sequences are stored directly */
	1279	/* code set 0 or 1 */
	1280	} else if(value<=0x8effff) {
	1281	/* code set 2 */
	1282	value&=0x7fff;
	1283	} else /* first byte is 0x8f */ {
	1284	/* code set 3 */
	1285	value&=0xff7f;
	1286	}
	1287	break;
	1288	case MBCS_OUTPUT_4_EUC:
	1289	if(value<=0xffffff) {
	1290	/* short sequences are stored directly */
	1291	/* code set 0 or 1 */
	1292	} else if(value<=0x8effffff) {
	1293	/* code set 2 */
	1294	value&=0x7fffff;
	1295	} else /* first byte is 0x8f */ {
	1296	/* code set 3 */
	1297	value&=0xff7fff;
	1298	}
	1299	break;
	1300	default:
	1301	break;
	1302	}
	1303
	1304	for(i=0; i<=0x1f; ++value, ++i) {
	1305	c=codePoints[i];
	1306	if(c<0) {
	1307	continue;
	1308	}
	1309
	1310	/* locate the stage 2 & 3 data */
	1311	stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
	1312	p=bytes;
	1313	st3=(int32_t)(uint16_t)stage216+(c&0xf);
	1314
	1315	/* write the codepage bytes into stage 3 */
	1316	switch(mbcsTable->outputType) {
	1317	case MBCS_OUTPUT_3:
	1318	case MBCS_OUTPUT_4_EUC:
	1319	p+=st3*3;
	1320	p[0]=(uint8_t)(value>>16);
	1321	p[1]=(uint8_t)(value>>8);
	1322	p[2]=(uint8_t)value;
	1323	break;
	1324	case MBCS_OUTPUT_4:
	1325	((uint32_t *)p)[st3]=value;
	1326	break;
	1327	default:
	1328	/* 2 bytes per character */
	1329	((uint16_t *)p)[st3]=(uint16_t)value;
	1330	break;
	1331	}
	1332
	1333	/* set the roundtrip flag */
	1334	*stage2\|=(1UL<<(16+(c&0xf)));
	1335	}
	1336	return TRUE;
	1337	}
	1338
	1339	static void
	1340	reconstituteData(UConverterMBCSTable *mbcsTable,
	1341	uint32_t stage1Length, uint32_t stage2Length,
	1342	uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */
	1343	UErrorCode *pErrorCode) {
	1344	uint16_t *stage1;
	1345	uint32_t *stage2;
	1346	uint32_t dataLength=stage1Length2+fullStage2Length4+mbcsTable->fromUBytesLength;
	1347	mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
	1348	if(mbcsTable->reconstitutedData==NULL) {
	1349	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	1350	return;
	1351	}
	1352	uprv_memset(mbcsTable->reconstitutedData, 0, dataLength);
	1353
	1354	/* copy existing data and reroute the pointers */
	1355	stage1=(uint16_t *)mbcsTable->reconstitutedData;
	1356	uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2);
	1357
	1358	stage2=(uint32_t *)(stage1+stage1Length);
	1359	uprv_memcpy(stage2+(fullStage2Length-stage2Length),
	1360	mbcsTable->fromUnicodeTable+stage1Length,
	1361	stage2Length*4);
	1362
	1363	mbcsTable->fromUnicodeTable=stage1;
	1364	mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
	1365
	1366	/* indexes into stage 2 count from the bottom of the fromUnicodeTable */
	1367	stage2=(uint32_t *)stage1;
	1368
	1369	/* reconstitute the initial part of stage 2 from the mbcsIndex */
	1370	{
	1371	int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
	1372	int32_t stageUTF8Index=0;
	1373	int32_t st1, st2, st3, i;
	1374
	1375	for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
	1376	st2=stage1[st1];
	1377	if(st2!=stage1Length/2) {
	1378	/* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
	1379	for(i=0; i<16; ++i) {
	1380	st3=mbcsTable->mbcsIndex[stageUTF8Index++];
	1381	if(st3!=0) {
	1382	/* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
	1383	st3>>=4;
	1384	/*
	1385	* 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
	1386	* allocated together as a single 64-block for access from the mbcsIndex
	1387	*/
	1388	stage2[st2++]=st3++;
	1389	stage2[st2++]=st3++;
	1390	stage2[st2++]=st3++;
	1391	stage2[st2++]=st3;
	1392	} else {
	1393	/* no stage 3 block, skip */
	1394	st2+=4;
	1395	}
	1396	}
	1397	} else {
	1398	/* no stage 2 block, skip */
	1399	stageUTF8Index+=16;
	1400	}
	1401	}
	1402	}
	1403
	1404	/* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
	1405	ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
	1406	}
	1407
	1408	/* MBCS setup functions ----------------------------------------------------- */
	1409
	1410	static void
	1411	ucnv_MBCSLoad(UConverterSharedData *sharedData,
	1412	UConverterLoadArgs *pArgs,
	1413	const uint8_t *raw,
	1414	UErrorCode *pErrorCode) {
	1415	UDataInfo info;
	1416	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
	1417	_MBCSHeader header=(_MBCSHeader )raw;
	1418	uint32_t offset;
	1419	uint32_t headerLength;
	1420	UBool noFromU=FALSE;
	1421
	1422	if(header->version[0]==4) {
	1423	headerLength=MBCS_HEADER_V4_LENGTH;
	1424	} else if(header->version[0]==5 && header->version[1]>=3 &&
	1425	(header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
	1426	headerLength=header->options&MBCS_OPT_LENGTH_MASK;
	1427	noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
	1428	} else {
	1429	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1430	return;
	1431	}
	1432
	1433	mbcsTable->outputType=(uint8_t)header->flags;
	1434	if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
	1435	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1436	return;
	1437	}
	1438
	1439	/* extension data, header version 4.2 and higher */
	1440	offset=header->flags>>8;
	1441	if(offset!=0) {
	1442	mbcsTable->extIndexes=(const int32_t *)(raw+offset);
	1443	}
	1444
	1445	if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
	1446	UConverterLoadArgs args={ 0 };
	1447	UConverterSharedData *baseSharedData;
	1448	const int32_t *extIndexes;
	1449	const char *baseName;
	1450
	1451	/* extension-only file, load the base table and set values appropriately */
	1452	if((extIndexes=mbcsTable->extIndexes)==NULL) {
	1453	/* extension-only file without extension */
	1454	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1455	return;
	1456	}
	1457
	1458	if(pArgs->nestedLoads!=1) {
	1459	/* an extension table must not be loaded as a base table */
	1460	*pErrorCode=U_INVALID_TABLE_FILE;
	1461	return;
	1462	}
	1463
	1464	/* load the base table */
	1465	baseName=(const char )header+headerLength4;
	1466	if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
	1467	/* forbid loading this same extension-only file */
	1468	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1469	return;
	1470	}
	1471
	1472	/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
	1473	args.size=sizeof(UConverterLoadArgs);
	1474	args.nestedLoads=2;
	1475	args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
	1476	args.reserved=pArgs->reserved;
	1477	args.options=pArgs->options;
	1478	args.pkg=pArgs->pkg;
	1479	args.name=baseName;
	1480	baseSharedData=ucnv_load(&args, pErrorCode);
	1481	if(U_FAILURE(*pErrorCode)) {
	1482	return;
	1483	}
	1484	if( baseSharedData->staticData->conversionType!=UCNV_MBCS \|\|
	1485	baseSharedData->mbcs.baseSharedData!=NULL
	1486	) {
	1487	ucnv_unload(baseSharedData);
	1488	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1489	return;
	1490	}
	1491	if(pArgs->onlyTestIsLoadable) {
	1492	/*
	1493	* Exit as soon as we know that we can load the converter
	1494	* and the format is valid and supported.
	1495	* The worst that can happen in the following code is a memory
	1496	* allocation error.
	1497	*/
	1498	ucnv_unload(baseSharedData);
	1499	return;
	1500	}
	1501
	1502	/* copy the base table data */
	1503	uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
	1504
	1505	/* overwrite values with relevant ones for the extension converter */
	1506	mbcsTable->baseSharedData=baseSharedData;
	1507	mbcsTable->extIndexes=extIndexes;
	1508
	1509	/*
	1510	* It would be possible to share the swapLFNL data with a base converter,
	1511	* but the generated name would have to be different, and the memory
	1512	* would have to be free'd only once.
	1513	* It is easier to just create the data for the extension converter
	1514	* separately when it is requested.
	1515	*/
	1516	mbcsTable->swapLFNLStateTable=NULL;
	1517	mbcsTable->swapLFNLFromUnicodeBytes=NULL;
	1518	mbcsTable->swapLFNLName=NULL;
	1519
	1520	/*
	1521	* The reconstitutedData must be deleted only when the base converter
	1522	* is unloaded.
	1523	*/
	1524	mbcsTable->reconstitutedData=NULL;
	1525
	1526	/*
	1527	* Set a special, runtime-only outputType if the extension converter
	1528	* is a DBCS version of a base converter that also maps single bytes.
	1529	*/
	1530	if( sharedData->staticData->conversionType==UCNV_DBCS \|\|
	1531	(sharedData->staticData->conversionType==UCNV_MBCS &&
	1532	sharedData->staticData->minBytesPerChar>=2)
	1533	) {
	1534	if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
	1535	/* the base converter is SI/SO-stateful */
	1536	int32_t entry;
	1537
	1538	/* get the dbcs state from the state table entry for SO=0x0e */
	1539	entry=mbcsTable->stateTable[0][0xe];
	1540	if( MBCS_ENTRY_IS_FINAL(entry) &&
	1541	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
	1542	MBCS_ENTRY_FINAL_STATE(entry)!=0
	1543	) {
	1544	mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
	1545
	1546	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
	1547	}
	1548	} else if(
	1549	baseSharedData->staticData->conversionType==UCNV_MBCS &&
	1550	baseSharedData->staticData->minBytesPerChar==1 &&
	1551	baseSharedData->staticData->maxBytesPerChar==2 &&
	1552	mbcsTable->countStates<=127
	1553	) {
	1554	/* non-stateful base converter, need to modify the state table */
	1555	int32_t (*newStateTable)[256];
	1556	int32_t *state;
	1557	int32_t i, count;
	1558
	1559	/* allocate a new state table and copy the base state table contents */
	1560	count=mbcsTable->countStates;
	1561	newStateTable=(int32_t ()[256])uprv_malloc((count+1)1024);
	1562	if(newStateTable==NULL) {
	1563	ucnv_unload(baseSharedData);
	1564	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	1565	return;
	1566	}
	1567
	1568	uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024);
	1569
	1570	/* change all final single-byte entries to go to a new all-illegal state */
	1571	state=newStateTable[0];
	1572	for(i=0; i<256; ++i) {
	1573	if(MBCS_ENTRY_IS_FINAL(state[i])) {
	1574	state[i]=MBCS_ENTRY_TRANSITION(count, 0);
	1575	}
	1576	}
	1577
	1578	/* build the new all-illegal state */
	1579	state=newStateTable[count];
	1580	for(i=0; i<256; ++i) {
	1581	state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
	1582	}
	1583	mbcsTable->stateTable=(const int32_t (*)[256])newStateTable;
	1584	mbcsTable->countStates=(uint8_t)(count+1);
	1585	mbcsTable->stateTableOwned=TRUE;
	1586
	1587	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
	1588	}
	1589	}
	1590
	1591	/*
	1592	* unlike below for files with base tables, do not get the unicodeMask
	1593	* from the sharedData; instead, use the base table's unicodeMask,
	1594	* which we copied in the memcpy above;
	1595	* this is necessary because the static data unicodeMask, especially
	1596	* the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
	1597	*/
	1598	} else {
	1599	/* conversion file with a base table; an additional extension table is optional */
	1600	/* make sure that the output type is known */
	1601	switch(mbcsTable->outputType) {
	1602	case MBCS_OUTPUT_1:
	1603	case MBCS_OUTPUT_2:
	1604	case MBCS_OUTPUT_3:
	1605	case MBCS_OUTPUT_4:
	1606	case MBCS_OUTPUT_3_EUC:
	1607	case MBCS_OUTPUT_4_EUC:
	1608	case MBCS_OUTPUT_2_SISO:
	1609	/* OK */
	1610	break;
	1611	default:
	1612	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1613	return;
	1614	}
	1615	if(pArgs->onlyTestIsLoadable) {
	1616	/*
	1617	* Exit as soon as we know that we can load the converter
	1618	* and the format is valid and supported.
	1619	* The worst that can happen in the following code is a memory
	1620	* allocation error.
	1621	*/
	1622	return;
	1623	}
	1624
	1625	mbcsTable->countStates=(uint8_t)header->countStates;
	1626	mbcsTable->countToUFallbacks=header->countToUFallbacks;
	1627	mbcsTable->stateTable=(const int32_t ()[256])(raw+headerLength4);
	1628	mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
	1629	mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
	1630
	1631	mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
	1632	mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
	1633	mbcsTable->fromUBytesLength=header->fromUBytesLength;
	1634
	1635	/*
	1636	* converter versions 6.1 and up contain a unicodeMask that is
	1637	* used here to select the most efficient function implementations
	1638	*/
	1639	info.size=sizeof(UDataInfo);
	1640	udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
	1641	if(info.formatVersion[0]>6 \|\| (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
	1642	/* mask off possible future extensions to be safe */
	1643	mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
	1644	} else {
	1645	/* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
	1646	mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY\|UCNV_HAS_SURROGATES;
	1647	}
	1648
	1649	/*
	1650	* _MBCSHeader.version 4.3 adds utf8Friendly data structures.
	1651	* Check for the header version, SBCS vs. MBCS, and for whether the
	1652	* data structures are optimized for code points as high as what the
	1653	* runtime code is designed for.
	1654	* The implementation does not handle mapping tables with entries for
	1655	* unpaired surrogates.
	1656	*/
	1657	if( header->version[1]>=3 &&
	1658	(mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 &&
	1659	(mbcsTable->countStates==1 ?
	1660	(header->version[2]>=(SBCS_FAST_MAX>>8)) :
	1661	(header->version[2]>=(MBCS_FAST_MAX>>8))
	1662	)
	1663	) {
	1664	mbcsTable->utf8Friendly=TRUE;
	1665
	1666	if(mbcsTable->countStates==1) {
	1667	/*
	1668	* SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
	1669	* Build a table with indexes to each block, to be used instead of
	1670	* the regular stage 1/2 table.
	1671	*/
	1672	int32_t i;
	1673	for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) {
	1674	mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
	1675	}
	1676	/* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */
	1677	mbcsTable->maxFastUChar=SBCS_FAST_MAX;
	1678	} else {
	1679	/*
	1680	* MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
	1681	* The .cnv file is prebuilt with an additional stage table with indexes
	1682	* to each block.
	1683	*/
	1684	mbcsTable->mbcsIndex=(const uint16_t *)
	1685	(mbcsTable->fromUnicodeBytes+
	1686	(noFromU ? 0 : mbcsTable->fromUBytesLength));
	1687	mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)\|0xff;
	1688	}
	1689	}
	1690
	1691	/* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
	1692	{
	1693	uint32_t asciiRoundtrips=0xffffffff;
	1694	int32_t i;
	1695
	1696	for(i=0; i<0x80; ++i) {
	1697	if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
	1698	asciiRoundtrips&=~((uint32_t)1<<(i>>2));
	1699	}
	1700	}
	1701	mbcsTable->asciiRoundtrips=asciiRoundtrips;
	1702	}
	1703
	1704	if(noFromU) {
	1705	uint32_t stage1Length=
	1706	mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ?
	1707	0x440 : 0x40;
	1708	uint32_t stage2Length=
	1709	(header->offsetFromUBytes-header->offsetFromUTable)/4-
	1710	stage1Length/2;
	1711	reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
	1712	}
	1713	}
	1714
	1715	/* Set the impl pointer here so that it is set for both extension-only and base tables. */
	1716	if(mbcsTable->utf8Friendly) {
	1717	if(mbcsTable->countStates==1) {
	1718	sharedData->impl=&_SBCSUTF8Impl;
	1719	} else {
	1720	if(mbcsTable->outputType==MBCS_OUTPUT_2) {
	1721	sharedData->impl=&_DBCSUTF8Impl;
	1722	}
	1723	}
	1724	}
	1725
	1726	if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) {
	1727	/*
	1728	* MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
	1729	* MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
	1730	*/
	1731	mbcsTable->asciiRoundtrips=0;
	1732	}
	1733	}
	1734
	1735	static void
	1736	ucnv_MBCSUnload(UConverterSharedData *sharedData) {
	1737	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
	1738
	1739	if(mbcsTable->swapLFNLStateTable!=NULL) {
	1740	uprv_free(mbcsTable->swapLFNLStateTable);
	1741	}
	1742	if(mbcsTable->stateTableOwned) {
	1743	uprv_free((void *)mbcsTable->stateTable);
	1744	}
	1745	if(mbcsTable->baseSharedData!=NULL) {
	1746	ucnv_unload(mbcsTable->baseSharedData);
	1747	}
	1748	if(mbcsTable->reconstitutedData!=NULL) {
	1749	uprv_free(mbcsTable->reconstitutedData);
	1750	}
	1751	}
	1752
	1753	static void
	1754	ucnv_MBCSOpen(UConverter *cnv,
	1755	UConverterLoadArgs *pArgs,
	1756	UErrorCode *pErrorCode) {
	1757	UConverterMBCSTable *mbcsTable;
	1758	const int32_t *extIndexes;
	1759	uint8_t outputType;
	1760	int8_t maxBytesPerUChar;
	1761
	1762	if(pArgs->onlyTestIsLoadable) {
	1763	return;
	1764	}
	1765
	1766	mbcsTable=&cnv->sharedData->mbcs;
	1767	outputType=mbcsTable->outputType;
	1768
	1769	if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
	1770	/* the swaplfnl option does not apply, remove it */
	1771	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
	1772	}
	1773
	1774	if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	1775	/* do this because double-checked locking is broken */
	1776	UBool isCached;
	1777
	1778	umtx_lock(NULL);
	1779	isCached=mbcsTable->swapLFNLStateTable!=NULL;
	1780	umtx_unlock(NULL);
	1781
	1782	if(!isCached) {
	1783	if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
	1784	if(U_FAILURE(*pErrorCode)) {
	1785	return; /* something went wrong */
	1786	}
	1787
	1788	/* the option does not apply, remove it */
	1789	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
	1790	}
	1791	}
	1792	}
	1793
	1794	if(uprv_strstr(pArgs->name, "18030")!=NULL) {
	1795	if(uprv_strstr(pArgs->name, "gb18030")!=NULL \|\| uprv_strstr(pArgs->name, "GB18030")!=NULL) {
	1796	/* set a flag for GB 18030 mode, which changes the callback behavior */
	1797	cnv->options\|=_MBCS_OPTION_GB18030;
	1798	}
	1799	} else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) \|\| (uprv_strstr(pArgs->name, "keis")!=NULL)) {
	1800	/* set a flag for KEIS converter, which changes the SI/SO character sequence */
	1801	cnv->options\|=_MBCS_OPTION_KEIS;
	1802	} else if((uprv_strstr(pArgs->name, "JEF")!=NULL) \|\| (uprv_strstr(pArgs->name, "jef")!=NULL)) {
	1803	/* set a flag for JEF converter, which changes the SI/SO character sequence */
	1804	cnv->options\|=_MBCS_OPTION_JEF;
	1805	} else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) \|\| (uprv_strstr(pArgs->name, "jips")!=NULL)) {
	1806	/* set a flag for JIPS converter, which changes the SI/SO character sequence */
	1807	cnv->options\|=_MBCS_OPTION_JIPS;
	1808	}
	1809
	1810	/* fix maxBytesPerUChar depending on outputType and options etc. */
	1811	if(outputType==MBCS_OUTPUT_2_SISO) {
	1812	cnv->maxBytesPerUChar=3; /* SO+DBCS */
	1813	}
	1814
	1815	extIndexes=mbcsTable->extIndexes;
	1816	if(extIndexes!=NULL) {
	1817	maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
	1818	if(outputType==MBCS_OUTPUT_2_SISO) {
	1819	++maxBytesPerUChar; /* SO + multiple DBCS */
	1820	}
	1821
	1822	if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
	1823	cnv->maxBytesPerUChar=maxBytesPerUChar;
	1824	}
	1825	}
	1826
	1827	#if 0
	1828	/*
	1829	* documentation of UConverter fields used for status
	1830	* all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
	1831	*/
	1832
	1833	/* toUnicode */
	1834	cnv->toUnicodeStatus=0; /* offset */
	1835	cnv->mode=0; /* state */
	1836	cnv->toULength=0; /* byteIndex */
	1837
	1838	/* fromUnicode */
	1839	cnv->fromUChar32=0;
	1840	cnv->fromUnicodeStatus=1; /* prevLength */
	1841	#endif
	1842	}
	1843
	1844	static const char *
	1845	ucnv_MBCSGetName(const UConverter *cnv) {
	1846	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
	1847	return cnv->sharedData->mbcs.swapLFNLName;
	1848	} else {
	1849	return cnv->sharedData->staticData->name;
	1850	}
	1851	}
	1852
	1853	/* MBCS-to-Unicode conversion functions ------------------------------------- */
	1854
	1855	static UChar32
	1856	ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
	1857	const _MBCSToUFallback *toUFallbacks;
	1858	uint32_t i, start, limit;
	1859
	1860	limit=mbcsTable->countToUFallbacks;
	1861	if(limit>0) {
	1862	/* do a binary search for the fallback mapping */
	1863	toUFallbacks=mbcsTable->toUFallbacks;
	1864	start=0;
	1865	while(start<limit-1) {
	1866	i=(start+limit)/2;
	1867	if(offset<toUFallbacks[i].offset) {
	1868	limit=i;
	1869	} else {
	1870	start=i;
	1871	}
	1872	}
	1873
	1874	/* did we really find it? */
	1875	if(offset==toUFallbacks[start].offset) {
	1876	return toUFallbacks[start].codePoint;
	1877	}
	1878	}
	1879
	1880	return 0xfffe;
	1881	}
	1882
	1883	/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
	1884	static void
	1885	ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	1886	UErrorCode *pErrorCode) {
	1887	UConverter *cnv;
	1888	const uint8_t source, sourceLimit;
	1889	UChar *target;
	1890	const UChar *targetLimit;
	1891	int32_t *offsets;
	1892
	1893	const int32_t (*stateTable)[256];
	1894
	1895	int32_t sourceIndex;
	1896
	1897	int32_t entry;
	1898	UChar c;
	1899	uint8_t action;
	1900
	1901	/* set up the local pointers */
	1902	cnv=pArgs->converter;
	1903	source=(const uint8_t *)pArgs->source;
	1904	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	1905	target=pArgs->target;
	1906	targetLimit=pArgs->targetLimit;
	1907	offsets=pArgs->offsets;
	1908
	1909	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	1910	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	1911	} else {
	1912	stateTable=cnv->sharedData->mbcs.stateTable;
	1913	}
	1914
	1915	/* sourceIndex=-1 if the current character began in the previous buffer */
	1916	sourceIndex=0;
	1917
	1918	/* conversion loop */
	1919	while(source<sourceLimit) {
	1920	/*
	1921	* This following test is to see if available input would overflow the output.
	1922	* It does not catch output of more than one code unit that
	1923	* overflows as a result of a surrogate pair or callback output
	1924	* from the last source byte.
	1925	* Therefore, those situations also test for overflows and will
	1926	* then break the loop, too.
	1927	*/
	1928	if(target>=targetLimit) {
	1929	/* target is full */
	1930	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1931	break;
	1932	}
	1933
	1934	entry=stateTable[0][*source++];
	1935	/* MBCS_ENTRY_IS_FINAL(entry) */
	1936
	1937	/* test the most common case first */
	1938	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	1939	/* output BMP code point */
	1940	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	1941	if(offsets!=NULL) {
	1942	*offsets++=sourceIndex;
	1943	}
	1944
	1945	/* normal end of action codes: prepare for a new character */
	1946	++sourceIndex;
	1947	continue;
	1948	}
	1949
	1950	/*
	1951	* An if-else-if chain provides more reliable performance for
	1952	* the most common cases compared to a switch.
	1953	*/
	1954	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	1955	if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
	1956	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	1957	) {
	1958	entry=MBCS_ENTRY_FINAL_VALUE(entry);
	1959	/* output surrogate pair */
	1960	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
	1961	if(offsets!=NULL) {
	1962	*offsets++=sourceIndex;
	1963	}
	1964	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
	1965	if(target<targetLimit) {
	1966	*target++=c;
	1967	if(offsets!=NULL) {
	1968	*offsets++=sourceIndex;
	1969	}
	1970	} else {
	1971	/* target overflow */
	1972	cnv->UCharErrorBuffer[0]=c;
	1973	cnv->UCharErrorBufferLength=1;
	1974	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	1975	break;
	1976	}
	1977
	1978	++sourceIndex;
	1979	continue;
	1980	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	1981	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	1982	/* output BMP code point */
	1983	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	1984	if(offsets!=NULL) {
	1985	*offsets++=sourceIndex;
	1986	}
	1987
	1988	++sourceIndex;
	1989	continue;
	1990	}
	1991	} else if(action==MBCS_STATE_UNASSIGNED) {
	1992	/* just fall through */
	1993	} else if(action==MBCS_STATE_ILLEGAL) {
	1994	/* callback(illegal) */
	1995	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	1996	} else {
	1997	/* reserved, must never occur */
	1998	++sourceIndex;
	1999	continue;
	2000	}
	2001
	2002	if(U_FAILURE(*pErrorCode)) {
	2003	/* callback(illegal) */
	2004	break;
	2005	} else /* unassigned sequences indicated with byteIndex>0 */ {
	2006	/* try an extension mapping */
	2007	pArgs->source=(const char *)source;
	2008	cnv->toUBytes[0]=*(source-1);
	2009	cnv->toULength=_extToU(cnv, cnv->sharedData,
	2010	1, &source, sourceLimit,
	2011	&target, targetLimit,
	2012	&offsets, sourceIndex,
	2013	pArgs->flush,
	2014	pErrorCode);
	2015	sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source);
	2016
	2017	if(U_FAILURE(*pErrorCode)) {
	2018	/* not mappable or buffer overflow */
	2019	break;
	2020	}
	2021	}
	2022	}
	2023
	2024	/* write back the updated pointers */
	2025	pArgs->source=(const char *)source;
	2026	pArgs->target=target;
	2027	pArgs->offsets=offsets;
	2028	}
	2029
	2030	/*
	2031	* This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
	2032	* that only map to and from the BMP.
	2033	* In addition to single-byte optimizations, the offset calculations
	2034	* become much easier.
	2035	*/
	2036	static void
	2037	ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
	2038	UErrorCode *pErrorCode) {
	2039	UConverter *cnv;
	2040	const uint8_t source, sourceLimit, *lastSource;
	2041	UChar *target;
	2042	int32_t targetCapacity, length;
	2043	int32_t *offsets;
	2044
	2045	const int32_t (*stateTable)[256];
	2046
	2047	int32_t sourceIndex;
	2048
	2049	int32_t entry;
	2050	uint8_t action;
	2051
	2052	/* set up the local pointers */
	2053	cnv=pArgs->converter;
	2054	source=(const uint8_t *)pArgs->source;
	2055	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2056	target=pArgs->target;
	2057	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	2058	offsets=pArgs->offsets;
	2059
	2060	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2061	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2062	} else {
	2063	stateTable=cnv->sharedData->mbcs.stateTable;
	2064	}
	2065
	2066	/* sourceIndex=-1 if the current character began in the previous buffer */
	2067	sourceIndex=0;
	2068	lastSource=source;
	2069
	2070	/*
	2071	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
	2072	* for the minimum of the sourceLength and targetCapacity
	2073	*/
	2074	length=(int32_t)(sourceLimit-source);
	2075	if(length<targetCapacity) {
	2076	targetCapacity=length;
	2077	}
	2078
	2079	#if MBCS_UNROLL_SINGLE_TO_BMP
	2080	/* unrolling makes it faster on Pentium III/Windows 2000 */
	2081	/* unroll the loop with the most common case */
	2082	unrolled:
	2083	if(targetCapacity>=16) {
	2084	int32_t count, loops, oredEntries;
	2085
	2086	loops=count=targetCapacity>>4;
	2087	do {
	2088	oredEntries=entry=stateTable[0][*source++];
	2089	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2090	oredEntries\|=entry=stateTable[0][*source++];
	2091	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2092	oredEntries\|=entry=stateTable[0][*source++];
	2093	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2094	oredEntries\|=entry=stateTable[0][*source++];
	2095	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2096	oredEntries\|=entry=stateTable[0][*source++];
	2097	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2098	oredEntries\|=entry=stateTable[0][*source++];
	2099	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2100	oredEntries\|=entry=stateTable[0][*source++];
	2101	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2102	oredEntries\|=entry=stateTable[0][*source++];
	2103	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2104	oredEntries\|=entry=stateTable[0][*source++];
	2105	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2106	oredEntries\|=entry=stateTable[0][*source++];
	2107	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2108	oredEntries\|=entry=stateTable[0][*source++];
	2109	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2110	oredEntries\|=entry=stateTable[0][*source++];
	2111	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2112	oredEntries\|=entry=stateTable[0][*source++];
	2113	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2114	oredEntries\|=entry=stateTable[0][*source++];
	2115	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2116	oredEntries\|=entry=stateTable[0][*source++];
	2117	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2118	oredEntries\|=entry=stateTable[0][*source++];
	2119	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2120
	2121	/* were all 16 entries really valid? */
	2122	if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) {
	2123	/* no, return to the first of these 16 */
	2124	source-=16;
	2125	target-=16;
	2126	break;
	2127	}
	2128	} while(--count>0);
	2129	count=loops-count;
	2130	targetCapacity-=16*count;
	2131
	2132	if(offsets!=NULL) {
	2133	lastSource+=16*count;
	2134	while(count>0) {
	2135	*offsets++=sourceIndex++;
	2136	*offsets++=sourceIndex++;
	2137	*offsets++=sourceIndex++;
	2138	*offsets++=sourceIndex++;
	2139	*offsets++=sourceIndex++;
	2140	*offsets++=sourceIndex++;
	2141	*offsets++=sourceIndex++;
	2142	*offsets++=sourceIndex++;
	2143	*offsets++=sourceIndex++;
	2144	*offsets++=sourceIndex++;
	2145	*offsets++=sourceIndex++;
	2146	*offsets++=sourceIndex++;
	2147	*offsets++=sourceIndex++;
	2148	*offsets++=sourceIndex++;
	2149	*offsets++=sourceIndex++;
	2150	*offsets++=sourceIndex++;
	2151	--count;
	2152	}
	2153	}
	2154	}
	2155	#endif
	2156
	2157	/* conversion loop */
	2158	while(targetCapacity > 0 && source < sourceLimit) {
	2159	entry=stateTable[0][*source++];
	2160	/* MBCS_ENTRY_IS_FINAL(entry) */
	2161
	2162	/* test the most common case first */
	2163	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2164	/* output BMP code point */
	2165	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2166	--targetCapacity;
	2167	continue;
	2168	}
	2169
	2170	/*
	2171	* An if-else-if chain provides more reliable performance for
	2172	* the most common cases compared to a switch.
	2173	*/
	2174	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2175	if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2176	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2177	/* output BMP code point */
	2178	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2179	--targetCapacity;
	2180	continue;
	2181	}
	2182	} else if(action==MBCS_STATE_UNASSIGNED) {
	2183	/* just fall through */
	2184	} else if(action==MBCS_STATE_ILLEGAL) {
	2185	/* callback(illegal) */
	2186	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2187	} else {
	2188	/* reserved, must never occur */
	2189	continue;
	2190	}
	2191
	2192	/* set offsets since the start or the last extension */
	2193	if(offsets!=NULL) {
	2194	int32_t count=(int32_t)(source-lastSource);
	2195
	2196	/* predecrement: do not set the offset for the callback-causing character */
	2197	while(--count>0) {
	2198	*offsets++=sourceIndex++;
	2199	}
	2200	/* offset and sourceIndex are now set for the current character */
	2201	}
	2202
	2203	if(U_FAILURE(*pErrorCode)) {
	2204	/* callback(illegal) */
	2205	break;
	2206	} else /* unassigned sequences indicated with byteIndex>0 */ {
	2207	/* try an extension mapping */
	2208	lastSource=source;
	2209	cnv->toUBytes[0]=*(source-1);
	2210	cnv->toULength=_extToU(cnv, cnv->sharedData,
	2211	1, &source, sourceLimit,
	2212	&target, pArgs->targetLimit,
	2213	&offsets, sourceIndex,
	2214	pArgs->flush,
	2215	pErrorCode);
	2216	sourceIndex+=1+(int32_t)(source-lastSource);
	2217
	2218	if(U_FAILURE(*pErrorCode)) {
	2219	/* not mappable or buffer overflow */
	2220	break;
	2221	}
	2222
	2223	/* recalculate the targetCapacity after an extension mapping */
	2224	targetCapacity=(int32_t)(pArgs->targetLimit-target);
	2225	length=(int32_t)(sourceLimit-source);
	2226	if(length<targetCapacity) {
	2227	targetCapacity=length;
	2228	}
	2229	}
	2230
	2231	#if MBCS_UNROLL_SINGLE_TO_BMP
	2232	/* unrolling makes it faster on Pentium III/Windows 2000 */
	2233	goto unrolled;
	2234	#endif
	2235	}
	2236
	2237	if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
	2238	/* target is full */
	2239	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2240	}
	2241
	2242	/* set offsets since the start or the last callback */
	2243	if(offsets!=NULL) {
	2244	size_t count=source-lastSource;
	2245	while(count>0) {
	2246	*offsets++=sourceIndex++;
	2247	--count;
	2248	}
	2249	}
	2250
	2251	/* write back the updated pointers */
	2252	pArgs->source=(const char *)source;
	2253	pArgs->target=target;
	2254	pArgs->offsets=offsets;
	2255	}
	2256
	2257	static UBool
	2258	hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
	2259	const int32_t *row=stateTable[state];
	2260	int32_t b, entry;
	2261	/* First test for final entries in this state for some commonly valid byte values. */
	2262	entry=row[0xa1];
	2263	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
	2264	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
	2265	) {
	2266	return TRUE;
	2267	}
	2268	entry=row[0x41];
	2269	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
	2270	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
	2271	) {
	2272	return TRUE;
	2273	}
	2274	/* Then test for final entries in this state. */
	2275	for(b=0; b<=0xff; ++b) {
	2276	entry=row[b];
	2277	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
	2278	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
	2279	) {
	2280	return TRUE;
	2281	}
	2282	}
	2283	/* Then recurse for transition entries. */
	2284	for(b=0; b<=0xff; ++b) {
	2285	entry=row[b];
	2286	if( MBCS_ENTRY_IS_TRANSITION(entry) &&
	2287	hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
	2288	) {
	2289	return TRUE;
	2290	}
	2291	}
	2292	return FALSE;
	2293	}
	2294
	2295	/*
	2296	* Is byte b a single/lead byte in this state?
	2297	* Recurse for transition states, because here we don't want to say that
	2298	* b is a lead byte if all byte sequences that start with b are illegal.
	2299	*/
	2300	static UBool
	2301	isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
	2302	const int32_t *row=stateTable[state];
	2303	int32_t entry=row[b];
	2304	if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
	2305	return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
	2306	} else {
	2307	uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2308	if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
	2309	return FALSE; /* SI/SO are illegal for DBCS-only conversion */
	2310	} else {
	2311	return action!=MBCS_STATE_ILLEGAL;
	2312	}
	2313	}
	2314	}
	2315
	2316	U_CFUNC void
	2317	ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	2318	UErrorCode *pErrorCode) {
	2319	UConverter *cnv;
	2320	const uint8_t source, sourceLimit;
	2321	UChar *target;
	2322	const UChar *targetLimit;
	2323	int32_t *offsets;
	2324
	2325	const int32_t (*stateTable)[256];
	2326	const uint16_t *unicodeCodeUnits;
	2327
	2328	uint32_t offset;
	2329	uint8_t state;
	2330	int8_t byteIndex;
	2331	uint8_t *bytes;
	2332
	2333	int32_t sourceIndex, nextSourceIndex;
	2334
	2335	int32_t entry;
	2336	UChar c;
	2337	uint8_t action;
	2338
	2339	/* use optimized function if possible */
	2340	cnv=pArgs->converter;
	2341
	2342	if(cnv->preToULength>0) {
	2343	/*
	2344	* pass sourceIndex=-1 because we continue from an earlier buffer
	2345	* in the future, this may change with continuous offsets
	2346	*/
	2347	ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode);
	2348
	2349	if(U_FAILURE(*pErrorCode) \|\| cnv->preToULength<0) {
	2350	return;
	2351	}
	2352	}
	2353
	2354	if(cnv->sharedData->mbcs.countStates==1) {
	2355	if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	2356	ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
	2357	} else {
	2358	ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
	2359	}
	2360	return;
	2361	}
	2362
	2363	/* set up the local pointers */
	2364	source=(const uint8_t *)pArgs->source;
	2365	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2366	target=pArgs->target;
	2367	targetLimit=pArgs->targetLimit;
	2368	offsets=pArgs->offsets;
	2369
	2370	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2371	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2372	} else {
	2373	stateTable=cnv->sharedData->mbcs.stateTable;
	2374	}
	2375	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
	2376
	2377	/* get the converter state from UConverter */
	2378	offset=cnv->toUnicodeStatus;
	2379	byteIndex=cnv->toULength;
	2380	bytes=cnv->toUBytes;
	2381
	2382	/*
	2383	* if we are in the SBCS state for a DBCS-only converter,
	2384	* then load the DBCS state from the MBCS data
	2385	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
	2386	*/
	2387	if((state=(uint8_t)(cnv->mode))==0) {
	2388	state=cnv->sharedData->mbcs.dbcsOnlyState;
	2389	}
	2390
	2391	/* sourceIndex=-1 if the current character began in the previous buffer */
	2392	sourceIndex=byteIndex==0 ? 0 : -1;
	2393	nextSourceIndex=0;
	2394
	2395	/* conversion loop */
	2396	while(source<sourceLimit) {
	2397	/*
	2398	* This following test is to see if available input would overflow the output.
	2399	* It does not catch output of more than one code unit that
	2400	* overflows as a result of a surrogate pair or callback output
	2401	* from the last source byte.
	2402	* Therefore, those situations also test for overflows and will
	2403	* then break the loop, too.
	2404	*/
	2405	if(target>=targetLimit) {
	2406	/* target is full */
	2407	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2408	break;
	2409	}
	2410
	2411	if(byteIndex==0) {
	2412	/* optimized loop for 1/2-byte input and BMP output */
	2413	if(offsets==NULL) {
	2414	do {
	2415	entry=stateTable[state][*source];
	2416	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	2417	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	2418	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	2419
	2420	++source;
	2421	if( source<sourceLimit &&
	2422	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
	2423	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
	2424	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
	2425	) {
	2426	++source;
	2427	*target++=c;
	2428	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2429	offset=0;
	2430	} else {
	2431	/* set the state and leave the optimized loop */
	2432	bytes[0]=*(source-1);
	2433	byteIndex=1;
	2434	break;
	2435	}
	2436	} else {
	2437	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2438	/* output BMP code point */
	2439	++source;
	2440	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2441	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2442	} else {
	2443	/* leave the optimized loop */
	2444	break;
	2445	}
	2446	}
	2447	} while(source<sourceLimit && target<targetLimit);
	2448	} else /* offsets!=NULL */ {
	2449	do {
	2450	entry=stateTable[state][*source];
	2451	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	2452	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	2453	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	2454
	2455	++source;
	2456	if( source<sourceLimit &&
	2457	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
	2458	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
	2459	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
	2460	) {
	2461	++source;
	2462	*target++=c;
	2463	if(offsets!=NULL) {
	2464	*offsets++=sourceIndex;
	2465	sourceIndex=(nextSourceIndex+=2);
	2466	}
	2467	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2468	offset=0;
	2469	} else {
	2470	/* set the state and leave the optimized loop */
	2471	++nextSourceIndex;
	2472	bytes[0]=*(source-1);
	2473	byteIndex=1;
	2474	break;
	2475	}
	2476	} else {
	2477	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2478	/* output BMP code point */
	2479	++source;
	2480	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2481	if(offsets!=NULL) {
	2482	*offsets++=sourceIndex;
	2483	sourceIndex=++nextSourceIndex;
	2484	}
	2485	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2486	} else {
	2487	/* leave the optimized loop */
	2488	break;
	2489	}
	2490	}
	2491	} while(source<sourceLimit && target<targetLimit);
	2492	}
	2493
	2494	/*
	2495	* these tests and break statements could be put inside the loop
	2496	* if C had "break outerLoop" like Java
	2497	*/
	2498	if(source>=sourceLimit) {
	2499	break;
	2500	}
	2501	if(target>=targetLimit) {
	2502	/* target is full */
	2503	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2504	break;
	2505	}
	2506
	2507	++nextSourceIndex;
	2508	bytes[byteIndex++]=*source++;
	2509	} else /* byteIndex>0 */ {
	2510	++nextSourceIndex;
	2511	entry=stateTable[state][bytes[byteIndex++]=*source++];
	2512	}
	2513
	2514	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	2515	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	2516	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	2517	continue;
	2518	}
	2519
	2520	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
	2521	cnv->mode=state;
	2522
	2523	/* set the next state early so that we can reuse the entry variable */
	2524	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2525
	2526	/*
	2527	* An if-else-if chain provides more reliable performance for
	2528	* the most common cases compared to a switch.
	2529	*/
	2530	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2531	if(action==MBCS_STATE_VALID_16) {
	2532	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	2533	c=unicodeCodeUnits[offset];
	2534	if(c<0xfffe) {
	2535	/* output BMP code point */
	2536	*target++=c;
	2537	if(offsets!=NULL) {
	2538	*offsets++=sourceIndex;
	2539	}
	2540	byteIndex=0;
	2541	} else if(c==0xfffe) {
	2542	if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
	2543	/* output fallback BMP code point */
	2544	*target++=(UChar)entry;
	2545	if(offsets!=NULL) {
	2546	*offsets++=sourceIndex;
	2547	}
	2548	byteIndex=0;
	2549	}
	2550	} else {
	2551	/* callback(illegal) */
	2552	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2553	}
	2554	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
	2555	/* output BMP code point */
	2556	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2557	if(offsets!=NULL) {
	2558	*offsets++=sourceIndex;
	2559	}
	2560	byteIndex=0;
	2561	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	2562	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	2563	c=unicodeCodeUnits[offset++];
	2564	if(c<0xd800) {
	2565	/* output BMP code point below 0xd800 */
	2566	*target++=c;
	2567	if(offsets!=NULL) {
	2568	*offsets++=sourceIndex;
	2569	}
	2570	byteIndex=0;
	2571	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
	2572	/* output roundtrip or fallback surrogate pair */
	2573	*target++=(UChar)(c&0xdbff);
	2574	if(offsets!=NULL) {
	2575	*offsets++=sourceIndex;
	2576	}
	2577	byteIndex=0;
	2578	if(target<targetLimit) {
	2579	*target++=unicodeCodeUnits[offset];
	2580	if(offsets!=NULL) {
	2581	*offsets++=sourceIndex;
	2582	}
	2583	} else {
	2584	/* target overflow */
	2585	cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
	2586	cnv->UCharErrorBufferLength=1;
	2587	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2588
	2589	offset=0;
	2590	break;
	2591	}
	2592	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
	2593	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	2594	*target++=unicodeCodeUnits[offset];
	2595	if(offsets!=NULL) {
	2596	*offsets++=sourceIndex;
	2597	}
	2598	byteIndex=0;
	2599	} else if(c==0xffff) {
	2600	/* callback(illegal) */
	2601	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2602	}
	2603	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
	2604	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	2605	) {
	2606	entry=MBCS_ENTRY_FINAL_VALUE(entry);
	2607	/* output surrogate pair */
	2608	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
	2609	if(offsets!=NULL) {
	2610	*offsets++=sourceIndex;
	2611	}
	2612	byteIndex=0;
	2613	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
	2614	if(target<targetLimit) {
	2615	*target++=c;
	2616	if(offsets!=NULL) {
	2617	*offsets++=sourceIndex;
	2618	}
	2619	} else {
	2620	/* target overflow */
	2621	cnv->UCharErrorBuffer[0]=c;
	2622	cnv->UCharErrorBufferLength=1;
	2623	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2624
	2625	offset=0;
	2626	break;
	2627	}
	2628	} else if(action==MBCS_STATE_CHANGE_ONLY) {
	2629	/*
	2630	* This serves as a state change without any output.
	2631	* It is useful for reading simple stateful encodings,
	2632	* for example using just Shift-In/Shift-Out codes.
	2633	* The 21 unused bits may later be used for more sophisticated
	2634	* state transitions.
	2635	*/
	2636	if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
	2637	byteIndex=0;
	2638	} else {
	2639	/* SI/SO are illegal for DBCS-only conversion */
	2640	state=(uint8_t)(cnv->mode); /* restore the previous state */
	2641
	2642	/* callback(illegal) */
	2643	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2644	}
	2645	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2646	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2647	/* output BMP code point */
	2648	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2649	if(offsets!=NULL) {
	2650	*offsets++=sourceIndex;
	2651	}
	2652	byteIndex=0;
	2653	}
	2654	} else if(action==MBCS_STATE_UNASSIGNED) {
	2655	/* just fall through */
	2656	} else if(action==MBCS_STATE_ILLEGAL) {
	2657	/* callback(illegal) */
	2658	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2659	} else {
	2660	/* reserved, must never occur */
	2661	byteIndex=0;
	2662	}
	2663
	2664	/* end of action codes: prepare for a new character */
	2665	offset=0;
	2666
	2667	if(byteIndex==0) {
	2668	sourceIndex=nextSourceIndex;
	2669	} else if(U_FAILURE(*pErrorCode)) {
	2670	/* callback(illegal) */
	2671	if(byteIndex>1) {
	2672	/*
	2673	* Ticket 5691: consistent illegal sequences:
	2674	* - We include at least the first byte in the illegal sequence.
	2675	* - If any of the non-initial bytes could be the start of a character,
	2676	* we stop the illegal sequence before the first one of those.
	2677	*/
	2678	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
	2679	int8_t i;
	2680	for(i=1;
	2681	i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
	2682	++i) {}
	2683	if(i<byteIndex) {
	2684	/* Back out some bytes. */
	2685	int8_t backOutDistance=byteIndex-i;
	2686	int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
	2687	byteIndex=i; /* length of reported illegal byte sequence */
	2688	if(backOutDistance<=bytesFromThisBuffer) {
	2689	source-=backOutDistance;
	2690	} else {
	2691	/* Back out bytes from the previous buffer: Need to replay them. */
	2692	cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
	2693	/* preToULength is negative! */
	2694	uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
	2695	source=(const uint8_t *)pArgs->source;
	2696	}
	2697	}
	2698	}
	2699	break;
	2700	} else /* unassigned sequences indicated with byteIndex>0 */ {
	2701	/* try an extension mapping */
	2702	pArgs->source=(const char *)source;
	2703	byteIndex=_extToU(cnv, cnv->sharedData,
	2704	byteIndex, &source, sourceLimit,
	2705	&target, targetLimit,
	2706	&offsets, sourceIndex,
	2707	pArgs->flush,
	2708	pErrorCode);
	2709	sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
	2710
	2711	if(U_FAILURE(*pErrorCode)) {
	2712	/* not mappable or buffer overflow */
	2713	break;
	2714	}
	2715	}
	2716	}
	2717
	2718	/* set the converter state back into UConverter */
	2719	cnv->toUnicodeStatus=offset;
	2720	cnv->mode=state;
	2721	cnv->toULength=byteIndex;
	2722
	2723	/* write back the updated pointers */
	2724	pArgs->source=(const char *)source;
	2725	pArgs->target=target;
	2726	pArgs->offsets=offsets;
	2727	}
	2728
	2729	/*
	2730	* This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages.
	2731	* We still need a conversion loop in case we find reserved action codes, which are to be ignored.
	2732	*/
	2733	static UChar32
	2734	ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
	2735	UErrorCode *pErrorCode) {
	2736	UConverter *cnv;
	2737	const int32_t (*stateTable)[256];
	2738	const uint8_t source, sourceLimit;
	2739
	2740	int32_t entry;
	2741	uint8_t action;
	2742
	2743	/* set up the local pointers */
	2744	cnv=pArgs->converter;
	2745	source=(const uint8_t *)pArgs->source;
	2746	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2747	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2748	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2749	} else {
	2750	stateTable=cnv->sharedData->mbcs.stateTable;
	2751	}
	2752
	2753	/* conversion loop */
	2754	while(source<sourceLimit) {
	2755	entry=stateTable[0][*source++];
	2756	/* MBCS_ENTRY_IS_FINAL(entry) */
	2757
	2758	/* write back the updated pointer early so that we can return directly */
	2759	pArgs->source=(const char *)source;
	2760
	2761	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2762	/* output BMP code point */
	2763	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2764	}
	2765
	2766	/*
	2767	* An if-else-if chain provides more reliable performance for
	2768	* the most common cases compared to a switch.
	2769	*/
	2770	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2771	if( action==MBCS_STATE_VALID_DIRECT_20 \|\|
	2772	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	2773	) {
	2774	/* output supplementary code point */
	2775	return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
	2776	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2777	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2778	/* output BMP code point */
	2779	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2780	}
	2781	} else if(action==MBCS_STATE_UNASSIGNED) {
	2782	/* just fall through */
	2783	} else if(action==MBCS_STATE_ILLEGAL) {
	2784	/* callback(illegal) */
	2785	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2786	} else {
	2787	/* reserved, must never occur */
	2788	continue;
	2789	}
	2790
	2791	if(U_FAILURE(*pErrorCode)) {
	2792	/* callback(illegal) */
	2793	break;
	2794	} else /* unassigned sequence */ {
	2795	/* defer to the generic implementation */
	2796	pArgs->source=(const char *)source-1;
	2797	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	2798	}
	2799	}
	2800
	2801	/* no output because of empty input or only state changes */
	2802	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	2803	return 0xffff;
	2804	}
	2805
	2806	/*
	2807	* Version of _MBCSToUnicodeWithOffsets() optimized for single-character
	2808	* conversion without offset handling.
	2809	*
	2810	* When a character does not have a mapping to Unicode, then we return to the
	2811	* generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback
	2812	* handling.
	2813	* We also defer to the generic code in other complicated cases and have them
	2814	* ultimately handled by _MBCSToUnicodeWithOffsets() itself.
	2815	*
	2816	* All normal mappings and errors are handled here.
	2817	*/
	2818	static UChar32
	2819	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
	2820	UErrorCode *pErrorCode) {
	2821	UConverter *cnv;
	2822	const uint8_t source, sourceLimit, *lastSource;
	2823
	2824	const int32_t (*stateTable)[256];
	2825	const uint16_t *unicodeCodeUnits;
	2826
	2827	uint32_t offset;
	2828	uint8_t state;
	2829
	2830	int32_t entry;
	2831	UChar32 c;
	2832	uint8_t action;
	2833
	2834	/* use optimized function if possible */
	2835	cnv=pArgs->converter;
	2836
	2837	if(cnv->preToULength>0) {
	2838	/* use the generic code in ucnv_getNextUChar() to continue with a partial match */
	2839	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	2840	}
	2841
	2842	if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
	2843	/*
	2844	* Using the generic ucnv_getNextUChar() code lets us deal correctly
	2845	* with the rare case of a codepage that maps single surrogates
	2846	* without adding the complexity to this already complicated function here.
	2847	*/
	2848	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	2849	} else if(cnv->sharedData->mbcs.countStates==1) {
	2850	return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode);
	2851	}
	2852
	2853	/* set up the local pointers */
	2854	source=lastSource=(const uint8_t *)pArgs->source;
	2855	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2856
	2857	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2858	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2859	} else {
	2860	stateTable=cnv->sharedData->mbcs.stateTable;
	2861	}
	2862	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
	2863
	2864	/* get the converter state from UConverter */
	2865	offset=cnv->toUnicodeStatus;
	2866
	2867	/*
	2868	* if we are in the SBCS state for a DBCS-only converter,
	2869	* then load the DBCS state from the MBCS data
	2870	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
	2871	*/
	2872	if((state=(uint8_t)(cnv->mode))==0) {
	2873	state=cnv->sharedData->mbcs.dbcsOnlyState;
	2874	}
	2875
	2876	/* conversion loop */
	2877	c=U_SENTINEL;
	2878	while(source<sourceLimit) {
	2879	entry=stateTable[state][*source++];
	2880	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	2881	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	2882	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	2883
	2884	/* optimization for 1/2-byte input and BMP output */
	2885	if( source<sourceLimit &&
	2886	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
	2887	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
	2888	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
	2889	) {
	2890	++source;
	2891	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2892	/* output BMP code point */
	2893	break;
	2894	}
	2895	} else {
	2896	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
	2897	cnv->mode=state;
	2898
	2899	/* set the next state early so that we can reuse the entry variable */
	2900	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2901
	2902	/*
	2903	* An if-else-if chain provides more reliable performance for
	2904	* the most common cases compared to a switch.
	2905	*/
	2906	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2907	if(action==MBCS_STATE_VALID_DIRECT_16) {
	2908	/* output BMP code point */
	2909	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2910	break;
	2911	} else if(action==MBCS_STATE_VALID_16) {
	2912	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	2913	c=unicodeCodeUnits[offset];
	2914	if(c<0xfffe) {
	2915	/* output BMP code point */
	2916	break;
	2917	} else if(c==0xfffe) {
	2918	if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
	2919	break;
	2920	}
	2921	} else {
	2922	/* callback(illegal) */
	2923	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2924	}
	2925	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	2926	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	2927	c=unicodeCodeUnits[offset++];
	2928	if(c<0xd800) {
	2929	/* output BMP code point below 0xd800 */
	2930	break;
	2931	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
	2932	/* output roundtrip or fallback supplementary code point */
	2933	c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
	2934	break;
	2935	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
	2936	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	2937	c=unicodeCodeUnits[offset];
	2938	break;
	2939	} else if(c==0xffff) {
	2940	/* callback(illegal) */
	2941	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2942	}
	2943	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
	2944	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	2945	) {
	2946	/* output supplementary code point */
	2947	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
	2948	break;
	2949	} else if(action==MBCS_STATE_CHANGE_ONLY) {
	2950	/*
	2951	* This serves as a state change without any output.
	2952	* It is useful for reading simple stateful encodings,
	2953	* for example using just Shift-In/Shift-Out codes.
	2954	* The 21 unused bits may later be used for more sophisticated
	2955	* state transitions.
	2956	*/
	2957	if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
	2958	/* SI/SO are illegal for DBCS-only conversion */
	2959	state=(uint8_t)(cnv->mode); /* restore the previous state */
	2960
	2961	/* callback(illegal) */
	2962	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2963	}
	2964	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2965	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2966	/* output BMP code point */
	2967	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2968	break;
	2969	}
	2970	} else if(action==MBCS_STATE_UNASSIGNED) {
	2971	/* just fall through */
	2972	} else if(action==MBCS_STATE_ILLEGAL) {
	2973	/* callback(illegal) */
	2974	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2975	} else {
	2976	/* reserved (must never occur), or only state change */
	2977	offset=0;
	2978	lastSource=source;
	2979	continue;
	2980	}
	2981
	2982	/* end of action codes: prepare for a new character */
	2983	offset=0;
	2984
	2985	if(U_FAILURE(*pErrorCode)) {
	2986	/* callback(illegal) */
	2987	break;
	2988	} else /* unassigned sequence */ {
	2989	/* defer to the generic implementation */
	2990	cnv->toUnicodeStatus=0;
	2991	cnv->mode=state;
	2992	pArgs->source=(const char *)lastSource;
	2993	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	2994	}
	2995	}
	2996	}
	2997
	2998	if(c<0) {
	2999	if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
	3000	/* incomplete character byte sequence */
	3001	uint8_t *bytes=cnv->toUBytes;
	3002	cnv->toULength=(int8_t)(source-lastSource);
	3003	do {
	3004	bytes++=lastSource++;
	3005	} while(lastSource<source);
	3006	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
	3007	} else if(U_FAILURE(*pErrorCode)) {
	3008	/* callback(illegal) */
	3009	/*
	3010	* Ticket 5691: consistent illegal sequences:
	3011	* - We include at least the first byte in the illegal sequence.
	3012	* - If any of the non-initial bytes could be the start of a character,
	3013	* we stop the illegal sequence before the first one of those.
	3014	*/
	3015	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
	3016	uint8_t *bytes=cnv->toUBytes;
	3017	bytes++=lastSource++; /* first byte */
	3018	if(lastSource==source) {
	3019	cnv->toULength=1;
	3020	} else /* lastSource<source: multi-byte character */ {
	3021	int8_t i;
	3022	for(i=1;
	3023	lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
	3024	++i
	3025	) {
	3026	bytes++=lastSource++;
	3027	}
	3028	cnv->toULength=i;
	3029	source=lastSource;
	3030	}
	3031	} else {
	3032	/* no output because of empty input or only state changes */
	3033	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	3034	}
	3035	c=0xffff;
	3036	}
	3037
	3038	/* set the converter state back into UConverter, ready for a new character */
	3039	cnv->toUnicodeStatus=0;
	3040	cnv->mode=state;
	3041
	3042	/* write back the updated pointer */
	3043	pArgs->source=(const char *)source;
	3044	return c;
	3045	}
	3046
	3047	#if 0
	3048	/*
	3049	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
	3050	* Removal improves code coverage.
	3051	*/
	3052	/**
	3053	* This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
	3054	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	3055	* It does not handle conversion extensions (_extToU()).
	3056	*/
	3057	U_CFUNC UChar32
	3058	ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
	3059	uint8_t b, UBool useFallback) {
	3060	int32_t entry;
	3061	uint8_t action;
	3062
	3063	entry=sharedData->mbcs.stateTable[0][b];
	3064	/* MBCS_ENTRY_IS_FINAL(entry) */
	3065
	3066	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	3067	/* output BMP code point */
	3068	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3069	}
	3070
	3071	/*
	3072	* An if-else-if chain provides more reliable performance for
	3073	* the most common cases compared to a switch.
	3074	*/
	3075	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	3076	if(action==MBCS_STATE_VALID_DIRECT_20) {
	3077	/* output supplementary code point */
	3078	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3079	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	3080	if(!TO_U_USE_FALLBACK(useFallback)) {
	3081	return 0xfffe;
	3082	}
	3083	/* output BMP code point */
	3084	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3085	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
	3086	if(!TO_U_USE_FALLBACK(useFallback)) {
	3087	return 0xfffe;
	3088	}
	3089	/* output supplementary code point */
	3090	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3091	} else if(action==MBCS_STATE_UNASSIGNED) {
	3092	return 0xfffe;
	3093	} else if(action==MBCS_STATE_ILLEGAL) {
	3094	return 0xffff;
	3095	} else {
	3096	/* reserved, must never occur */
	3097	return 0xffff;
	3098	}
	3099	}
	3100	#endif
	3101
	3102	/*
	3103	* This is a simple version of _MBCSGetNextUChar() that is used
	3104	* by other converter implementations.
	3105	* It only returns an "assigned" result if it consumes the entire input.
	3106	* It does not use state from the converter, nor error codes.
	3107	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	3108	* It handles conversion extensions but not GB 18030.
	3109	*
	3110	* Return value:
	3111	* U+fffe unassigned
	3112	* U+ffff illegal
	3113	* otherwise the Unicode code point
	3114	*/
	3115	U_CFUNC UChar32
	3116	ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
	3117	const char *source, int32_t length,
	3118	UBool useFallback) {
	3119	const int32_t (*stateTable)[256];
	3120	const uint16_t *unicodeCodeUnits;
	3121
	3122	uint32_t offset;
	3123	uint8_t state, action;
	3124
	3125	UChar32 c;
	3126	int32_t i, entry;
	3127
	3128	if(length<=0) {
	3129	/* no input at all: "illegal" */
	3130	return 0xffff;
	3131	}
	3132
	3133	#if 0
	3134	/*
	3135	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
	3136	* TODO In future releases, verify that this function is never called for SBCS
	3137	* conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
	3138	* Removal improves code coverage.
	3139	*/
	3140	/* use optimized function if possible */
	3141	if(sharedData->mbcs.countStates==1) {
	3142	if(length==1) {
	3143	return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
	3144	} else {
	3145	return 0xffff; /* illegal: more than a single byte for an SBCS converter */
	3146	}
	3147	}
	3148	#endif
	3149
	3150	/* set up the local pointers */
	3151	stateTable=sharedData->mbcs.stateTable;
	3152	unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
	3153
	3154	/* converter state */
	3155	offset=0;
	3156	state=sharedData->mbcs.dbcsOnlyState;
	3157
	3158	/* conversion loop */
	3159	for(i=0;;) {
	3160	entry=stateTable[state][(uint8_t)source[i++]];
	3161	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	3162	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	3163	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	3164
	3165	if(i==length) {
	3166	return 0xffff; /* truncated character */
	3167	}
	3168	} else {
	3169	/*
	3170	* An if-else-if chain provides more reliable performance for
	3171	* the most common cases compared to a switch.
	3172	*/
	3173	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	3174	if(action==MBCS_STATE_VALID_16) {
	3175	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	3176	c=unicodeCodeUnits[offset];
	3177	if(c!=0xfffe) {
	3178	/* done */
	3179	} else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	3180	c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset);
	3181	/* else done with 0xfffe */
	3182	}
	3183	break;
	3184	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
	3185	/* output BMP code point */
	3186	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3187	break;
	3188	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	3189	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	3190	c=unicodeCodeUnits[offset++];
	3191	if(c<0xd800) {
	3192	/* output BMP code point below 0xd800 */
	3193	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
	3194	/* output roundtrip or fallback supplementary code point */
	3195	c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
	3196	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
	3197	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	3198	c=unicodeCodeUnits[offset];
	3199	} else if(c==0xffff) {
	3200	return 0xffff;
	3201	} else {
	3202	c=0xfffe;
	3203	}
	3204	break;
	3205	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
	3206	/* output supplementary code point */
	3207	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3208	break;
	3209	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	3210	if(!TO_U_USE_FALLBACK(useFallback)) {
	3211	c=0xfffe;
	3212	break;
	3213	}
	3214	/* output BMP code point */
	3215	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3216	break;
	3217	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
	3218	if(!TO_U_USE_FALLBACK(useFallback)) {
	3219	c=0xfffe;
	3220	break;
	3221	}
	3222	/* output supplementary code point */
	3223	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3224	break;
	3225	} else if(action==MBCS_STATE_UNASSIGNED) {
	3226	c=0xfffe;
	3227	break;
	3228	}
	3229
	3230	/*
	3231	* forbid MBCS_STATE_CHANGE_ONLY for this function,
	3232	* and MBCS_STATE_ILLEGAL and reserved action codes
	3233	*/
	3234	return 0xffff;
	3235	}
	3236	}
	3237
	3238	if(i!=length) {
	3239	/* illegal for this function: not all input consumed */
	3240	return 0xffff;
	3241	}
	3242
	3243	if(c==0xfffe) {
	3244	/* try an extension mapping */
	3245	const int32_t *cx=sharedData->mbcs.extIndexes;
	3246	if(cx!=NULL) {
	3247	return ucnv_extSimpleMatchToU(cx, source, length, useFallback);
	3248	}
	3249	}
	3250
	3251	return c;
	3252	}
	3253
	3254	/* MBCS-from-Unicode conversion functions ----------------------------------- */
	3255
	3256	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
	3257	static void
	3258	ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	3259	UErrorCode *pErrorCode) {
	3260	UConverter *cnv;
	3261	const UChar source, sourceLimit;
	3262	uint8_t *target;
	3263	int32_t targetCapacity;
	3264	int32_t *offsets;
	3265
	3266	const uint16_t *table;
	3267	const uint16_t *mbcsIndex;
	3268	const uint8_t *bytes;
	3269
	3270	UChar32 c;
	3271
	3272	int32_t sourceIndex, nextSourceIndex;
	3273
	3274	uint32_t stage2Entry;
	3275	uint32_t asciiRoundtrips;
	3276	uint32_t value;
	3277	uint8_t unicodeMask;
	3278
	3279	/* use optimized function if possible */
	3280	cnv=pArgs->converter;
	3281	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
	3282
	3283	/* set up the local pointers */
	3284	source=pArgs->source;
	3285	sourceLimit=pArgs->sourceLimit;
	3286	target=(uint8_t *)pArgs->target;
	3287	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	3288	offsets=pArgs->offsets;
	3289
	3290	table=cnv->sharedData->mbcs.fromUnicodeTable;
	3291	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
	3292	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3293	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	3294	} else {
	3295	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
	3296	}
	3297	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	3298
	3299	/* get the converter state from UConverter */
	3300	c=cnv->fromUChar32;
	3301
	3302	/* sourceIndex=-1 if the current character began in the previous buffer */
	3303	sourceIndex= c==0 ? 0 : -1;
	3304	nextSourceIndex=0;
	3305
	3306	/* conversion loop */
	3307	if(c!=0 && targetCapacity>0) {
	3308	goto getTrail;
	3309	}
	3310
	3311	while(source<sourceLimit) {
	3312	/*
	3313	* This following test is to see if available input would overflow the output.
	3314	* It does not catch output of more than one byte that
	3315	* overflows as a result of a multi-byte character or callback output
	3316	* from the last source character.
	3317	* Therefore, those situations also test for overflows and will
	3318	* then break the loop, too.
	3319	*/
	3320	if(targetCapacity>0) {
	3321	/*
	3322	* Get a correct Unicode code point:
	3323	* a single UChar for a BMP code point or
	3324	* a matched surrogate pair for a "supplementary code point".
	3325	*/
	3326	c=*source++;
	3327	++nextSourceIndex;
	3328	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
	3329	*target++=(uint8_t)c;
	3330	if(offsets!=NULL) {
	3331	*offsets++=sourceIndex;
	3332	sourceIndex=nextSourceIndex;
	3333	}
	3334	--targetCapacity;
	3335	c=0;
	3336	continue;
	3337	}
	3338	/*
	3339	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
	3340	* to avoid dealing with surrogates.
	3341	* MBCS_FAST_MAX must be >=0xd7ff.
	3342	*/
	3343	if(c<=0xd7ff) {
	3344	value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c);
	3345	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
	3346	if(value==0) {
	3347	goto unassigned;
	3348	}
	3349	/* output the value */
	3350	} else {
	3351	/*
	3352	* This also tests if the codepage maps single surrogates.
	3353	* If it does, then surrogates are not paired but mapped separately.
	3354	* Note that in this case unmatched surrogates are not detected.
	3355	*/
	3356	if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
	3357	if(U16_IS_SURROGATE_LEAD(c)) {
	3358	getTrail:
	3359	if(source<sourceLimit) {
	3360	/* test the following code unit */
	3361	UChar trail=*source;
	3362	if(U16_IS_TRAIL(trail)) {
	3363	++source;
	3364	++nextSourceIndex;
	3365	c=U16_GET_SUPPLEMENTARY(c, trail);
	3366	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	3367	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	3368	/* callback(unassigned) */
	3369	goto unassigned;
	3370	}
	3371	/* convert this supplementary code point */
	3372	/* exit this condition tree */
	3373	} else {
	3374	/* this is an unmatched lead code unit (1st surrogate) */
	3375	/* callback(illegal) */
	3376	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3377	break;
	3378	}
	3379	} else {
	3380	/* no more input */
	3381	break;
	3382	}
	3383	} else {
	3384	/* this is an unmatched trail code unit (2nd surrogate) */
	3385	/* callback(illegal) */
	3386	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3387	break;
	3388	}
	3389	}
	3390
	3391	/* convert the Unicode code point in c into codepage bytes */
	3392	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	3393
	3394	/* get the bytes and the length for the output */
	3395	/* MBCS_OUTPUT_2 */
	3396	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	3397
	3398	/* is this code point assigned, or do we use fallbacks? */
	3399	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
	3400	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
	3401	) {
	3402	/*
	3403	* We allow a 0 byte output if the "assigned" bit is set for this entry.
	3404	* There is no way with this data structure for fallback output
	3405	* to be a zero byte.
	3406	*/
	3407
	3408	unassigned:
	3409	/* try an extension mapping */
	3410	pArgs->source=source;
	3411	c=_extFromU(cnv, cnv->sharedData,
	3412	c, &source, sourceLimit,
	3413	&target, target+targetCapacity,
	3414	&offsets, sourceIndex,
	3415	pArgs->flush,
	3416	pErrorCode);
	3417	nextSourceIndex+=(int32_t)(source-pArgs->source);
	3418
	3419	if(U_FAILURE(*pErrorCode)) {
	3420	/* not mappable or buffer overflow */
	3421	break;
	3422	} else {
	3423	/* a mapping was written to the target, continue */
	3424
	3425	/* recalculate the targetCapacity after an extension mapping */
	3426	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	3427
	3428	/* normal end of conversion: prepare for a new character */
	3429	sourceIndex=nextSourceIndex;
	3430	continue;
	3431	}
	3432	}
	3433	}
	3434
	3435	/* write the output character bytes from value and length */
	3436	/* from the first if in the loop we know that targetCapacity>0 */
	3437	if(value<=0xff) {
	3438	/* this is easy because we know that there is enough space */
	3439	*target++=(uint8_t)value;
	3440	if(offsets!=NULL) {
	3441	*offsets++=sourceIndex;
	3442	}
	3443	--targetCapacity;
	3444	} else /* length==2 */ {
	3445	*target++=(uint8_t)(value>>8);
	3446	if(2<=targetCapacity) {
	3447	*target++=(uint8_t)value;
	3448	if(offsets!=NULL) {
	3449	*offsets++=sourceIndex;
	3450	*offsets++=sourceIndex;
	3451	}
	3452	targetCapacity-=2;
	3453	} else {
	3454	if(offsets!=NULL) {
	3455	*offsets++=sourceIndex;
	3456	}
	3457	cnv->charErrorBuffer[0]=(char)value;
	3458	cnv->charErrorBufferLength=1;
	3459
	3460	/* target overflow */
	3461	targetCapacity=0;
	3462	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	3463	c=0;
	3464	break;
	3465	}
	3466	}
	3467
	3468	/* normal end of conversion: prepare for a new character */
	3469	c=0;
	3470	sourceIndex=nextSourceIndex;
	3471	continue;
	3472	} else {
	3473	/* target is full */
	3474	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	3475	break;
	3476	}
	3477	}
	3478
	3479	/* set the converter state back into UConverter */
	3480	cnv->fromUChar32=c;
	3481
	3482	/* write back the updated pointers */
	3483	pArgs->source=source;
	3484	pArgs->target=(char *)target;
	3485	pArgs->offsets=offsets;
	3486	}
	3487
	3488	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
	3489	static void
	3490	ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	3491	UErrorCode *pErrorCode) {
	3492	UConverter *cnv;
	3493	const UChar source, sourceLimit;
	3494	uint8_t *target;
	3495	int32_t targetCapacity;
	3496	int32_t *offsets;
	3497
	3498	const uint16_t *table;
	3499	const uint16_t *results;
	3500
	3501	UChar32 c;
	3502
	3503	int32_t sourceIndex, nextSourceIndex;
	3504
	3505	uint16_t value, minValue;
	3506	UBool hasSupplementary;
	3507
	3508	/* set up the local pointers */
	3509	cnv=pArgs->converter;
	3510	source=pArgs->source;
	3511	sourceLimit=pArgs->sourceLimit;
	3512	target=(uint8_t *)pArgs->target;
	3513	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	3514	offsets=pArgs->offsets;
	3515
	3516	table=cnv->sharedData->mbcs.fromUnicodeTable;
	3517	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3518	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	3519	} else {
	3520	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	3521	}
	3522
	3523	if(cnv->useFallback) {
	3524	/* use all roundtrip and fallback results */
	3525	minValue=0x800;
	3526	} else {
	3527	/* use only roundtrips and fallbacks from private-use characters */
	3528	minValue=0xc00;
	3529	}
	3530	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
	3531
	3532	/* get the converter state from UConverter */
	3533	c=cnv->fromUChar32;
	3534
	3535	/* sourceIndex=-1 if the current character began in the previous buffer */
	3536	sourceIndex= c==0 ? 0 : -1;
	3537	nextSourceIndex=0;
	3538
	3539	/* conversion loop */
	3540	if(c!=0 && targetCapacity>0) {
	3541	goto getTrail;
	3542	}
	3543
	3544	while(source<sourceLimit) {
	3545	/*
	3546	* This following test is to see if available input would overflow the output.
	3547	* It does not catch output of more than one byte that
	3548	* overflows as a result of a multi-byte character or callback output
	3549	* from the last source character.
	3550	* Therefore, those situations also test for overflows and will
	3551	* then break the loop, too.
	3552	*/
	3553	if(targetCapacity>0) {
	3554	/*
	3555	* Get a correct Unicode code point:
	3556	* a single UChar for a BMP code point or
	3557	* a matched surrogate pair for a "supplementary code point".
	3558	*/
	3559	c=*source++;
	3560	++nextSourceIndex;
	3561	if(U16_IS_SURROGATE(c)) {
	3562	if(U16_IS_SURROGATE_LEAD(c)) {
	3563	getTrail:
	3564	if(source<sourceLimit) {
	3565	/* test the following code unit */
	3566	UChar trail=*source;
	3567	if(U16_IS_TRAIL(trail)) {
	3568	++source;
	3569	++nextSourceIndex;
	3570	c=U16_GET_SUPPLEMENTARY(c, trail);
	3571	if(!hasSupplementary) {
	3572	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	3573	/* callback(unassigned) */
	3574	goto unassigned;
	3575	}
	3576	/* convert this supplementary code point */
	3577	/* exit this condition tree */
	3578	} else {
	3579	/* this is an unmatched lead code unit (1st surrogate) */
	3580	/* callback(illegal) */
	3581	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3582	break;
	3583	}
	3584	} else {
	3585	/* no more input */
	3586	break;
	3587	}
	3588	} else {
	3589	/* this is an unmatched trail code unit (2nd surrogate) */
	3590	/* callback(illegal) */
	3591	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3592	break;
	3593	}
	3594	}
	3595
	3596	/* convert the Unicode code point in c into codepage bytes */
	3597	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3598
	3599	/* is this code point assigned, or do we use fallbacks? */
	3600	if(value>=minValue) {
	3601	/* assigned, write the output character bytes from value and length */
	3602	/* length==1 */
	3603	/* this is easy because we know that there is enough space */
	3604	*target++=(uint8_t)value;
	3605	if(offsets!=NULL) {
	3606	*offsets++=sourceIndex;
	3607	}
	3608	--targetCapacity;
	3609
	3610	/* normal end of conversion: prepare for a new character */
	3611	c=0;
	3612	sourceIndex=nextSourceIndex;
	3613	} else { /* unassigned */
	3614	unassigned:
	3615	/* try an extension mapping */
	3616	pArgs->source=source;
	3617	c=_extFromU(cnv, cnv->sharedData,
	3618	c, &source, sourceLimit,
	3619	&target, target+targetCapacity,
	3620	&offsets, sourceIndex,
	3621	pArgs->flush,
	3622	pErrorCode);
	3623	nextSourceIndex+=(int32_t)(source-pArgs->source);
	3624
	3625	if(U_FAILURE(*pErrorCode)) {
	3626	/* not mappable or buffer overflow */
	3627	break;
	3628	} else {
	3629	/* a mapping was written to the target, continue */
	3630
	3631	/* recalculate the targetCapacity after an extension mapping */
	3632	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	3633
	3634	/* normal end of conversion: prepare for a new character */
	3635	sourceIndex=nextSourceIndex;
	3636	}
	3637	}
	3638	} else {
	3639	/* target is full */
	3640	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	3641	break;
	3642	}
	3643	}
	3644
	3645	/* set the converter state back into UConverter */
	3646	cnv->fromUChar32=c;
	3647
	3648	/* write back the updated pointers */
	3649	pArgs->source=source;
	3650	pArgs->target=(char *)target;
	3651	pArgs->offsets=offsets;
	3652	}
	3653
	3654	/*
	3655	* This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
	3656	* that map only to and from the BMP.
	3657	* In addition to single-byte/state optimizations, the offset calculations
	3658	* become much easier.
	3659	* It would be possible to use the sbcsIndex for UTF-8-friendly tables,
	3660	* but measurements have shown that this diminishes performance
	3661	* in more cases than it improves it.
	3662	* See SVN revision 21013 (2007-feb-06) for the last version with #if switches
	3663	* for various MBCS and SBCS optimizations.
	3664	*/
	3665	static void
	3666	ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
	3667	UErrorCode *pErrorCode) {
	3668	UConverter *cnv;
	3669	const UChar source, sourceLimit, *lastSource;
	3670	uint8_t *target;
	3671	int32_t targetCapacity, length;
	3672	int32_t *offsets;
	3673
	3674	const uint16_t *table;
	3675	const uint16_t *results;
	3676
	3677	UChar32 c;
	3678
	3679	int32_t sourceIndex;
	3680
	3681	uint32_t asciiRoundtrips;
	3682	uint16_t value, minValue;
	3683
	3684	/* set up the local pointers */
	3685	cnv=pArgs->converter;
	3686	source=pArgs->source;
	3687	sourceLimit=pArgs->sourceLimit;
	3688	target=(uint8_t *)pArgs->target;
	3689	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	3690	offsets=pArgs->offsets;
	3691
	3692	table=cnv->sharedData->mbcs.fromUnicodeTable;
	3693	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3694	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	3695	} else {
	3696	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	3697	}
	3698	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	3699
	3700	if(cnv->useFallback) {
	3701	/* use all roundtrip and fallback results */
	3702	minValue=0x800;
	3703	} else {
	3704	/* use only roundtrips and fallbacks from private-use characters */
	3705	minValue=0xc00;
	3706	}
	3707
	3708	/* get the converter state from UConverter */
	3709	c=cnv->fromUChar32;
	3710
	3711	/* sourceIndex=-1 if the current character began in the previous buffer */
	3712	sourceIndex= c==0 ? 0 : -1;
	3713	lastSource=source;
	3714
	3715	/*
	3716	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
	3717	* for the minimum of the sourceLength and targetCapacity
	3718	*/
	3719	length=(int32_t)(sourceLimit-source);
	3720	if(length<targetCapacity) {
	3721	targetCapacity=length;
	3722	}
	3723
	3724	/* conversion loop */
	3725	if(c!=0 && targetCapacity>0) {
	3726	goto getTrail;
	3727	}
	3728
	3729	#if MBCS_UNROLL_SINGLE_FROM_BMP
	3730	/* unrolling makes it slower on Pentium III/Windows 2000?! */
	3731	/* unroll the loop with the most common case */
	3732	unrolled:
	3733	if(targetCapacity>=4) {
	3734	int32_t count, loops;
	3735	uint16_t andedValues;
	3736
	3737	loops=count=targetCapacity>>2;
	3738	do {
	3739	c=*source++;
	3740	andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3741	*target++=(uint8_t)value;
	3742	c=*source++;
	3743	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3744	*target++=(uint8_t)value;
	3745	c=*source++;
	3746	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3747	*target++=(uint8_t)value;
	3748	c=*source++;
	3749	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3750	*target++=(uint8_t)value;
	3751
	3752	/* were all 4 entries really valid? */
	3753	if(andedValues<minValue) {
	3754	/* no, return to the first of these 4 */
	3755	source-=4;
	3756	target-=4;
	3757	break;
	3758	}
	3759	} while(--count>0);
	3760	count=loops-count;
	3761	targetCapacity-=4*count;
	3762
	3763	if(offsets!=NULL) {
	3764	lastSource+=4*count;
	3765	while(count>0) {
	3766	*offsets++=sourceIndex++;
	3767	*offsets++=sourceIndex++;
	3768	*offsets++=sourceIndex++;
	3769	*offsets++=sourceIndex++;
	3770	--count;
	3771	}
	3772	}
	3773
	3774	c=0;
	3775	}
	3776	#endif
	3777
	3778	while(targetCapacity>0) {
	3779	/*
	3780	* Get a correct Unicode code point:
	3781	* a single UChar for a BMP code point or
	3782	* a matched surrogate pair for a "supplementary code point".
	3783	*/
	3784	c=*source++;
	3785	/*
	3786	* Do not immediately check for single surrogates:
	3787	* Assume that they are unassigned and check for them in that case.
	3788	* This speeds up the conversion of assigned characters.
	3789	*/
	3790	/* convert the Unicode code point in c into codepage bytes */
	3791	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
	3792	*target++=(uint8_t)c;
	3793	--targetCapacity;
	3794	c=0;
	3795	continue;
	3796	}
	3797	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3798	/* is this code point assigned, or do we use fallbacks? */
	3799	if(value>=minValue) {
	3800	/* assigned, write the output character bytes from value and length */
	3801	/* length==1 */
	3802	/* this is easy because we know that there is enough space */
	3803	*target++=(uint8_t)value;
	3804	--targetCapacity;
	3805
	3806	/* normal end of conversion: prepare for a new character */
	3807	c=0;
	3808	continue;
	3809	} else if(!U16_IS_SURROGATE(c)) {
	3810	/* normal, unassigned BMP character */
	3811	} else if(U16_IS_SURROGATE_LEAD(c)) {
	3812	getTrail:
	3813	if(source<sourceLimit) {
	3814	/* test the following code unit */
	3815	UChar trail=*source;
	3816	if(U16_IS_TRAIL(trail)) {
	3817	++source;
	3818	c=U16_GET_SUPPLEMENTARY(c, trail);
	3819	/* this codepage does not map supplementary code points */
	3820	/* callback(unassigned) */
	3821	} else {
	3822	/* this is an unmatched lead code unit (1st surrogate) */
	3823	/* callback(illegal) */
	3824	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3825	break;
	3826	}
	3827	} else {
	3828	/* no more input */
	3829	if (pArgs->flush) {
	3830	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
	3831	}
	3832	break;
	3833	}
	3834	} else {
	3835	/* this is an unmatched trail code unit (2nd surrogate) */
	3836	/* callback(illegal) */
	3837	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3838	break;
	3839	}
	3840
	3841	/* c does not have a mapping */
	3842
	3843	/* get the number of code units for c to correctly advance sourceIndex */
	3844	length=U16_LENGTH(c);
	3845
	3846	/* set offsets since the start or the last extension */
	3847	if(offsets!=NULL) {
	3848	int32_t count=(int32_t)(source-lastSource);
	3849
	3850	/* do not set the offset for this character */
	3851	count-=length;
	3852
	3853	while(count>0) {
	3854	*offsets++=sourceIndex++;
	3855	--count;
	3856	}
	3857	/* offsets and sourceIndex are now set for the current character */
	3858	}
	3859
	3860	/* try an extension mapping */
	3861	lastSource=source;
	3862	c=_extFromU(cnv, cnv->sharedData,
	3863	c, &source, sourceLimit,
	3864	&target, (const uint8_t *)(pArgs->targetLimit),
	3865	&offsets, sourceIndex,
	3866	pArgs->flush,
	3867	pErrorCode);
	3868	sourceIndex+=length+(int32_t)(source-lastSource);
	3869	lastSource=source;
	3870
	3871	if(U_FAILURE(*pErrorCode)) {
	3872	/* not mappable or buffer overflow */
	3873	break;
	3874	} else {
	3875	/* a mapping was written to the target, continue */
	3876
	3877	/* recalculate the targetCapacity after an extension mapping */
	3878	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	3879	length=(int32_t)(sourceLimit-source);
	3880	if(length<targetCapacity) {
	3881	targetCapacity=length;
	3882	}
	3883	}
	3884
	3885	#if MBCS_UNROLL_SINGLE_FROM_BMP
	3886	/* unrolling makes it slower on Pentium III/Windows 2000?! */
	3887	goto unrolled;
	3888	#endif
	3889	}
	3890
	3891	if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {
	3892	/* target is full */
	3893	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	3894	}
	3895
	3896	/* set offsets since the start or the last callback */
	3897	if(offsets!=NULL) {
	3898	size_t count=source-lastSource;
	3899	if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
	3900	/*
	3901	Caller gave us a partial supplementary character,
	3902	which this function couldn't convert in any case.
	3903	The callback will handle the offset.
	3904	*/
	3905	count--;
	3906	}
	3907	while(count>0) {
	3908	*offsets++=sourceIndex++;
	3909	--count;
	3910	}
	3911	}
	3912
	3913	/* set the converter state back into UConverter */
	3914	cnv->fromUChar32=c;
	3915
	3916	/* write back the updated pointers */
	3917	pArgs->source=source;
	3918	pArgs->target=(char *)target;
	3919	pArgs->offsets=offsets;
	3920	}
	3921
	3922	U_CFUNC void
	3923	ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	3924	UErrorCode *pErrorCode) {
	3925	UConverter *cnv;
	3926	const UChar source, sourceLimit;
	3927	uint8_t *target;
	3928	int32_t targetCapacity;
	3929	int32_t *offsets;
	3930
	3931	const uint16_t *table;
	3932	const uint16_t *mbcsIndex;
	3933	const uint8_t p, bytes;
	3934	uint8_t outputType;
	3935
	3936	UChar32 c;
	3937
	3938	int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
	3939
	3940	uint32_t stage2Entry;
	3941	uint32_t asciiRoundtrips;
	3942	uint32_t value;
	3943	/* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
	3944	uint8_t siBytes[2] = {0, 0};
	3945	uint8_t soBytes[2] = {0, 0};
	3946	uint8_t siLength, soLength;
	3947	int32_t length = 0, prevLength;
	3948	uint8_t unicodeMask;
	3949
	3950	cnv=pArgs->converter;
	3951
	3952	if(cnv->preFromUFirstCP>=0) {
	3953	/*
	3954	* pass sourceIndex=-1 because we continue from an earlier buffer
	3955	* in the future, this may change with continuous offsets
	3956	*/
	3957	ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode);
	3958
	3959	if(U_FAILURE(*pErrorCode) \|\| cnv->preFromULength<0) {
	3960	return;
	3961	}
	3962	}
	3963
	3964	/* use optimized function if possible */
	3965	outputType=cnv->sharedData->mbcs.outputType;
	3966	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
	3967	if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
	3968	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	3969	ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
	3970	} else {
	3971	ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
	3972	}
	3973	return;
	3974	} else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) {
	3975	ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode);
	3976	return;
	3977	}
	3978
	3979	/* set up the local pointers */
	3980	source=pArgs->source;
	3981	sourceLimit=pArgs->sourceLimit;
	3982	target=(uint8_t *)pArgs->target;
	3983	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	3984	offsets=pArgs->offsets;
	3985
	3986	table=cnv->sharedData->mbcs.fromUnicodeTable;
	3987	if(cnv->sharedData->mbcs.utf8Friendly) {
	3988	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
	3989	} else {
	3990	mbcsIndex=NULL;
	3991	}
	3992	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3993	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	3994	} else {
	3995	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
	3996	}
	3997	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	3998
	3999	/* get the converter state from UConverter */
	4000	c=cnv->fromUChar32;
	4001
	4002	if(outputType==MBCS_OUTPUT_2_SISO) {
	4003	prevLength=cnv->fromUnicodeStatus;
	4004	if(prevLength==0) {
	4005	/* set the real value */
	4006	prevLength=1;
	4007	}
	4008	} else {
	4009	/* prevent fromUnicodeStatus from being set to something non-0 */
	4010	prevLength=0;
	4011	}
	4012
	4013	/* sourceIndex=-1 if the current character began in the previous buffer */
	4014	prevSourceIndex=-1;
	4015	sourceIndex= c==0 ? 0 : -1;
	4016	nextSourceIndex=0;
	4017
	4018	/* Get the SI/SO character for the converter */
	4019	siLength = getSISOBytes(SI, cnv->options, siBytes);
	4020	soLength = getSISOBytes(SO, cnv->options, soBytes);
	4021
	4022	/* conversion loop */
	4023	/*
	4024	* This is another piece of ugly code:
	4025	* A goto into the loop if the converter state contains a first surrogate
	4026	* from the previous function call.
	4027	* It saves me to check in each loop iteration a check of if(c==0)
	4028	* and duplicating the trail-surrogate-handling code in the else
	4029	* branch of that check.
	4030	* I could not find any other way to get around this other than
	4031	* using a function call for the conversion and callback, which would
	4032	* be even more inefficient.
	4033	*
	4034	* Markus Scherer 2000-jul-19
	4035	*/
	4036	if(c!=0 && targetCapacity>0) {
	4037	goto getTrail;
	4038	}
	4039
	4040	while(source<sourceLimit) {
	4041	/*
	4042	* This following test is to see if available input would overflow the output.
	4043	* It does not catch output of more than one byte that
	4044	* overflows as a result of a multi-byte character or callback output
	4045	* from the last source character.
	4046	* Therefore, those situations also test for overflows and will
	4047	* then break the loop, too.
	4048	*/
	4049	if(targetCapacity>0) {
	4050	/*
	4051	* Get a correct Unicode code point:
	4052	* a single UChar for a BMP code point or
	4053	* a matched surrogate pair for a "supplementary code point".
	4054	*/
	4055	c=*source++;
	4056	++nextSourceIndex;
	4057	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
	4058	*target++=(uint8_t)c;
	4059	if(offsets!=NULL) {
	4060	*offsets++=sourceIndex;
	4061	prevSourceIndex=sourceIndex;
	4062	sourceIndex=nextSourceIndex;
	4063	}
	4064	--targetCapacity;
	4065	c=0;
	4066	continue;
	4067	}
	4068	/*
	4069	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
	4070	* to avoid dealing with surrogates.
	4071	* MBCS_FAST_MAX must be >=0xd7ff.
	4072	*/
	4073	if(c<=0xd7ff && mbcsIndex!=NULL) {
	4074	value=mbcsIndex[c>>6];
	4075
	4076	/* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
	4077	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
	4078	switch(outputType) {
	4079	case MBCS_OUTPUT_2:
	4080	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4081	if(value<=0xff) {
	4082	if(value==0) {
	4083	goto unassigned;
	4084	} else {
	4085	length=1;
	4086	}
	4087	} else {
	4088	length=2;
	4089	}
	4090	break;
	4091	case MBCS_OUTPUT_2_SISO:
	4092	/* 1/2-byte stateful with Shift-In/Shift-Out */
	4093	/*
	4094	* Save the old state in the converter object
	4095	* right here, then change the local prevLength state variable if necessary.
	4096	* Then, if this character turns out to be unassigned or a fallback that
	4097	* is not taken, the callback code must not save the new state in the converter
	4098	* because the new state is for a character that is not output.
	4099	* However, the callback must still restore the state from the converter
	4100	* in case the callback function changed it for its output.
	4101	*/
	4102	cnv->fromUnicodeStatus=prevLength; /* save the old state */
	4103	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4104	if(value<=0xff) {
	4105	if(value==0) {
	4106	goto unassigned;
	4107	} else if(prevLength<=1) {
	4108	length=1;
	4109	} else {
	4110	/* change from double-byte mode to single-byte */
	4111	if (siLength == 1) {
	4112	value\|=(uint32_t)siBytes[0]<<8;
	4113	length = 2;
	4114	} else if (siLength == 2) {
	4115	value\|=(uint32_t)siBytes[1]<<8;
	4116	value\|=(uint32_t)siBytes[0]<<16;
	4117	length = 3;
	4118	}
	4119	prevLength=1;
	4120	}
	4121	} else {
	4122	if(prevLength==2) {
	4123	length=2;
	4124	} else {
	4125	/* change from single-byte mode to double-byte */
	4126	if (soLength == 1) {
	4127	value\|=(uint32_t)soBytes[0]<<16;
	4128	length = 3;
	4129	} else if (soLength == 2) {
	4130	value\|=(uint32_t)soBytes[1]<<16;
	4131	value\|=(uint32_t)soBytes[0]<<24;
	4132	length = 4;
	4133	}
	4134	prevLength=2;
	4135	}
	4136	}
	4137	break;
	4138	case MBCS_OUTPUT_DBCS_ONLY:
	4139	/* table with single-byte results, but only DBCS mappings used */
	4140	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4141	if(value<=0xff) {
	4142	/* no mapping or SBCS result, not taken for DBCS-only */
	4143	goto unassigned;
	4144	} else {
	4145	length=2;
	4146	}
	4147	break;
	4148	case MBCS_OUTPUT_3:
	4149	p=bytes+(value+(c&0x3f))*3;
	4150	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4151	if(value<=0xff) {
	4152	if(value==0) {
	4153	goto unassigned;
	4154	} else {
	4155	length=1;
	4156	}
	4157	} else if(value<=0xffff) {
	4158	length=2;
	4159	} else {
	4160	length=3;
	4161	}
	4162	break;
	4163	case MBCS_OUTPUT_4:
	4164	value=((const uint32_t *)bytes)[value +(c&0x3f)];
	4165	if(value<=0xff) {
	4166	if(value==0) {
	4167	goto unassigned;
	4168	} else {
	4169	length=1;
	4170	}
	4171	} else if(value<=0xffff) {
	4172	length=2;
	4173	} else if(value<=0xffffff) {
	4174	length=3;
	4175	} else {
	4176	length=4;
	4177	}
	4178	break;
	4179	case MBCS_OUTPUT_3_EUC:
	4180	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4181	/* EUC 16-bit fixed-length representation */
	4182	if(value<=0xff) {
	4183	if(value==0) {
	4184	goto unassigned;
	4185	} else {
	4186	length=1;
	4187	}
	4188	} else if((value&0x8000)==0) {
	4189	value\|=0x8e8000;
	4190	length=3;
	4191	} else if((value&0x80)==0) {
	4192	value\|=0x8f0080;
	4193	length=3;
	4194	} else {
	4195	length=2;
	4196	}
	4197	break;
	4198	case MBCS_OUTPUT_4_EUC:
	4199	p=bytes+(value+(c&0x3f))*3;
	4200	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4201	/* EUC 16-bit fixed-length representation applied to the first two bytes */
	4202	if(value<=0xff) {
	4203	if(value==0) {
	4204	goto unassigned;
	4205	} else {
	4206	length=1;
	4207	}
	4208	} else if(value<=0xffff) {
	4209	length=2;
	4210	} else if((value&0x800000)==0) {
	4211	value\|=0x8e800000;
	4212	length=4;
	4213	} else if((value&0x8000)==0) {
	4214	value\|=0x8f008000;
	4215	length=4;
	4216	} else {
	4217	length=3;
	4218	}
	4219	break;
	4220	default:
	4221	/* must not occur */
	4222	/*
	4223	* To avoid compiler warnings that value & length may be
	4224	* used without having been initialized, we set them here.
	4225	* In reality, this is unreachable code.
	4226	* Not having a default branch also causes warnings with
	4227	* some compilers.
	4228	*/
	4229	value=0;
	4230	length=0;
	4231	break;
	4232	}
	4233	/* output the value */
	4234	} else {
	4235	/*
	4236	* This also tests if the codepage maps single surrogates.
	4237	* If it does, then surrogates are not paired but mapped separately.
	4238	* Note that in this case unmatched surrogates are not detected.
	4239	*/
	4240	if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
	4241	if(U16_IS_SURROGATE_LEAD(c)) {
	4242	getTrail:
	4243	if(source<sourceLimit) {
	4244	/* test the following code unit */
	4245	UChar trail=*source;
	4246	if(U16_IS_TRAIL(trail)) {
	4247	++source;
	4248	++nextSourceIndex;
	4249	c=U16_GET_SUPPLEMENTARY(c, trail);
	4250	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	4251	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	4252	cnv->fromUnicodeStatus=prevLength; /* save the old state */
	4253	/* callback(unassigned) */
	4254	goto unassigned;
	4255	}
	4256	/* convert this supplementary code point */
	4257	/* exit this condition tree */
	4258	} else {
	4259	/* this is an unmatched lead code unit (1st surrogate) */
	4260	/* callback(illegal) */
	4261	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	4262	break;
	4263	}
	4264	} else {
	4265	/* no more input */
	4266	break;
	4267	}
	4268	} else {
	4269	/* this is an unmatched trail code unit (2nd surrogate) */
	4270	/* callback(illegal) */
	4271	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	4272	break;
	4273	}
	4274	}
	4275
	4276	/* convert the Unicode code point in c into codepage bytes */
	4277
	4278	/*
	4279	* The basic lookup is a triple-stage compact array (trie) lookup.
	4280	* For details see the beginning of this file.
	4281	*
	4282	* Single-byte codepages are handled with a different data structure
	4283	* by _MBCSSingle... functions.
	4284	*
	4285	* The result consists of a 32-bit value from stage 2 and
	4286	* a pointer to as many bytes as are stored per character.
	4287	* The pointer points to the character's bytes in stage 3.
	4288	* Bits 15..0 of the stage 2 entry contain the stage 3 index
	4289	* for that pointer, while bits 31..16 are flags for which of
	4290	* the 16 characters in the block are roundtrip-assigned.
	4291	*
	4292	* For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
	4293	* respectively as uint32_t, in the platform encoding.
	4294	* For 3-byte codepages, the bytes are always stored in big-endian order.
	4295	*
	4296	* For EUC encodings that use only either 0x8e or 0x8f as the first
	4297	* byte of their longest byte sequences, the first two bytes in
	4298	* this third stage indicate with their 7th bits whether these bytes
	4299	* are to be written directly or actually need to be preceeded by
	4300	* one of the two Single-Shift codes. With this, the third stage
	4301	* stores one byte fewer per character than the actual maximum length of
	4302	* EUC byte sequences.
	4303	*
	4304	* Other than that, leading zero bytes are removed and the other
	4305	* bytes output. A single zero byte may be output if the "assigned"
	4306	* bit in stage 2 was on.
	4307	* The data structure does not support zero byte output as a fallback,
	4308	* and also does not allow output of leading zeros.
	4309	*/
	4310	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	4311
	4312	/* get the bytes and the length for the output */
	4313	switch(outputType) {
	4314	case MBCS_OUTPUT_2:
	4315	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4316	if(value<=0xff) {
	4317	length=1;
	4318	} else {
	4319	length=2;
	4320	}
	4321	break;
	4322	case MBCS_OUTPUT_2_SISO:
	4323	/* 1/2-byte stateful with Shift-In/Shift-Out */
	4324	/*
	4325	* Save the old state in the converter object
	4326	* right here, then change the local prevLength state variable if necessary.
	4327	* Then, if this character turns out to be unassigned or a fallback that
	4328	* is not taken, the callback code must not save the new state in the converter
	4329	* because the new state is for a character that is not output.
	4330	* However, the callback must still restore the state from the converter
	4331	* in case the callback function changed it for its output.
	4332	*/
	4333	cnv->fromUnicodeStatus=prevLength; /* save the old state */
	4334	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4335	if(value<=0xff) {
	4336	if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) {
	4337	/* no mapping, leave value==0 */
	4338	length=0;
	4339	} else if(prevLength<=1) {
	4340	length=1;
	4341	} else {
	4342	/* change from double-byte mode to single-byte */
	4343	if (siLength == 1) {
	4344	value\|=(uint32_t)siBytes[0]<<8;
	4345	length = 2;
	4346	} else if (siLength == 2) {
	4347	value\|=(uint32_t)siBytes[1]<<8;
	4348	value\|=(uint32_t)siBytes[0]<<16;
	4349	length = 3;
	4350	}
	4351	prevLength=1;
	4352	}
	4353	} else {
	4354	if(prevLength==2) {
	4355	length=2;
	4356	} else {
	4357	/* change from single-byte mode to double-byte */
	4358	if (soLength == 1) {
	4359	value\|=(uint32_t)soBytes[0]<<16;
	4360	length = 3;
	4361	} else if (soLength == 2) {
	4362	value\|=(uint32_t)soBytes[1]<<16;
	4363	value\|=(uint32_t)soBytes[0]<<24;
	4364	length = 4;
	4365	}
	4366	prevLength=2;
	4367	}
	4368	}
	4369	break;
	4370	case MBCS_OUTPUT_DBCS_ONLY:
	4371	/* table with single-byte results, but only DBCS mappings used */
	4372	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4373	if(value<=0xff) {
	4374	/* no mapping or SBCS result, not taken for DBCS-only */
	4375	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
	4376	length=0;
	4377	} else {
	4378	length=2;
	4379	}
	4380	break;
	4381	case MBCS_OUTPUT_3:
	4382	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
	4383	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4384	if(value<=0xff) {
	4385	length=1;
	4386	} else if(value<=0xffff) {
	4387	length=2;
	4388	} else {
	4389	length=3;
	4390	}
	4391	break;
	4392	case MBCS_OUTPUT_4:
	4393	value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
	4394	if(value<=0xff) {
	4395	length=1;
	4396	} else if(value<=0xffff) {
	4397	length=2;
	4398	} else if(value<=0xffffff) {
	4399	length=3;
	4400	} else {
	4401	length=4;
	4402	}
	4403	break;
	4404	case MBCS_OUTPUT_3_EUC:
	4405	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4406	/* EUC 16-bit fixed-length representation */
	4407	if(value<=0xff) {
	4408	length=1;
	4409	} else if((value&0x8000)==0) {
	4410	value\|=0x8e8000;
	4411	length=3;
	4412	} else if((value&0x80)==0) {
	4413	value\|=0x8f0080;
	4414	length=3;
	4415	} else {
	4416	length=2;
	4417	}
	4418	break;
	4419	case MBCS_OUTPUT_4_EUC:
	4420	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
	4421	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4422	/* EUC 16-bit fixed-length representation applied to the first two bytes */
	4423	if(value<=0xff) {
	4424	length=1;
	4425	} else if(value<=0xffff) {
	4426	length=2;
	4427	} else if((value&0x800000)==0) {
	4428	value\|=0x8e800000;
	4429	length=4;
	4430	} else if((value&0x8000)==0) {
	4431	value\|=0x8f008000;
	4432	length=4;
	4433	} else {
	4434	length=3;
	4435	}
	4436	break;
	4437	default:
	4438	/* must not occur */
	4439	/*
	4440	* To avoid compiler warnings that value & length may be
	4441	* used without having been initialized, we set them here.
	4442	* In reality, this is unreachable code.
	4443	* Not having a default branch also causes warnings with
	4444	* some compilers.
	4445	*/
	4446	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
	4447	length=0;
	4448	break;
	4449	}
	4450
	4451	/* is this code point assigned, or do we use fallbacks? */
	4452	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 \|\|
	4453	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
	4454	) {
	4455	/*
	4456	* We allow a 0 byte output if the "assigned" bit is set for this entry.
	4457	* There is no way with this data structure for fallback output
	4458	* to be a zero byte.
	4459	*/
	4460
	4461	unassigned:
	4462	/* try an extension mapping */
	4463	pArgs->source=source;
	4464	c=_extFromU(cnv, cnv->sharedData,
	4465	c, &source, sourceLimit,
	4466	&target, target+targetCapacity,
	4467	&offsets, sourceIndex,
	4468	pArgs->flush,
	4469	pErrorCode);
	4470	nextSourceIndex+=(int32_t)(source-pArgs->source);
	4471	prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
	4472
	4473	if(U_FAILURE(*pErrorCode)) {
	4474	/* not mappable or buffer overflow */
	4475	break;
	4476	} else {
	4477	/* a mapping was written to the target, continue */
	4478
	4479	/* recalculate the targetCapacity after an extension mapping */
	4480	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	4481
	4482	/* normal end of conversion: prepare for a new character */
	4483	if(offsets!=NULL) {
	4484	prevSourceIndex=sourceIndex;
	4485	sourceIndex=nextSourceIndex;
	4486	}
	4487	continue;
	4488	}
	4489	}
	4490	}
	4491
	4492	/* write the output character bytes from value and length */
	4493	/* from the first if in the loop we know that targetCapacity>0 */
	4494	if(length<=targetCapacity) {
	4495	if(offsets==NULL) {
	4496	switch(length) {
	4497	/* each branch falls through to the next one */
	4498	case 4:
	4499	*target++=(uint8_t)(value>>24);
	4500	case 3: /fall through/
	4501	*target++=(uint8_t)(value>>16);
	4502	case 2: /fall through/
	4503	*target++=(uint8_t)(value>>8);
	4504	case 1: /fall through/
	4505	*target++=(uint8_t)value;
	4506	default:
	4507	/* will never occur */
	4508	break;
	4509	}
	4510	} else {
	4511	switch(length) {
	4512	/* each branch falls through to the next one */
	4513	case 4:
	4514	*target++=(uint8_t)(value>>24);
	4515	*offsets++=sourceIndex;
	4516	case 3: /fall through/
	4517	*target++=(uint8_t)(value>>16);
	4518	*offsets++=sourceIndex;
	4519	case 2: /fall through/
	4520	*target++=(uint8_t)(value>>8);
	4521	*offsets++=sourceIndex;
	4522	case 1: /fall through/
	4523	*target++=(uint8_t)value;
	4524	*offsets++=sourceIndex;
	4525	default:
	4526	/* will never occur */
	4527	break;
	4528	}
	4529	}
	4530	targetCapacity-=length;
	4531	} else {
	4532	uint8_t *charErrorBuffer;
	4533
	4534	/*
	4535	* We actually do this backwards here:
	4536	* In order to save an intermediate variable, we output
	4537	* first to the overflow buffer what does not fit into the
	4538	* regular target.
	4539	*/
	4540	/* we know that 1<=targetCapacity<length<=4 */
	4541	length-=targetCapacity;
	4542	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
	4543	switch(length) {
	4544	/* each branch falls through to the next one */
	4545	case 3:
	4546	*charErrorBuffer++=(uint8_t)(value>>16);
	4547	case 2: /fall through/
	4548	*charErrorBuffer++=(uint8_t)(value>>8);
	4549	case 1: /fall through/
	4550	*charErrorBuffer=(uint8_t)value;
	4551	default:
	4552	/* will never occur */
	4553	break;
	4554	}
	4555	cnv->charErrorBufferLength=(int8_t)length;
	4556
	4557	/* now output what fits into the regular target */
	4558	value>>=8length; / length was reduced by targetCapacity */
	4559	switch(targetCapacity) {
	4560	/* each branch falls through to the next one */
	4561	case 3:
	4562	*target++=(uint8_t)(value>>16);
	4563	if(offsets!=NULL) {
	4564	*offsets++=sourceIndex;
	4565	}
	4566	case 2: /fall through/
	4567	*target++=(uint8_t)(value>>8);
	4568	if(offsets!=NULL) {
	4569	*offsets++=sourceIndex;
	4570	}
	4571	case 1: /fall through/
	4572	*target++=(uint8_t)value;
	4573	if(offsets!=NULL) {
	4574	*offsets++=sourceIndex;
	4575	}
	4576	default:
	4577	/* will never occur */
	4578	break;
	4579	}
	4580
	4581	/* target overflow */
	4582	targetCapacity=0;
	4583	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4584	c=0;
	4585	break;
	4586	}
	4587
	4588	/* normal end of conversion: prepare for a new character */
	4589	c=0;
	4590	if(offsets!=NULL) {
	4591	prevSourceIndex=sourceIndex;
	4592	sourceIndex=nextSourceIndex;
	4593	}
	4594	continue;
	4595	} else {
	4596	/* target is full */
	4597	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4598	break;
	4599	}
	4600	}
	4601
	4602	/*
	4603	* the end of the input stream and detection of truncated input
	4604	* are handled by the framework, but for EBCDIC_STATEFUL conversion
	4605	* we need to emit an SI at the very end
	4606	*
	4607	* conditions:
	4608	* successful
	4609	* EBCDIC_STATEFUL in DBCS mode
	4610	* end of input and no truncated input
	4611	*/
	4612	if( U_SUCCESS(*pErrorCode) &&
	4613	outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
	4614	pArgs->flush && source>=sourceLimit && c==0
	4615	) {
	4616	/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
	4617	if(targetCapacity>0) {
	4618	*target++=(uint8_t)siBytes[0];
	4619	if (siLength == 2) {
	4620	if (targetCapacity<2) {
	4621	cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
	4622	cnv->charErrorBufferLength=1;
	4623	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4624	} else {
	4625	*target++=(uint8_t)siBytes[1];
	4626	}
	4627	}
	4628	if(offsets!=NULL) {
	4629	/* set the last source character's index (sourceIndex points at sourceLimit now) */
	4630	*offsets++=prevSourceIndex;
	4631	}
	4632	} else {
	4633	/* target is full */
	4634	cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
	4635	if (siLength == 2) {
	4636	cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
	4637	}
	4638	cnv->charErrorBufferLength=siLength;
	4639	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4640	}
	4641	prevLength=1; /* we switched into SBCS */
	4642	}
	4643
	4644	/* set the converter state back into UConverter */
	4645	cnv->fromUChar32=c;
	4646	cnv->fromUnicodeStatus=prevLength;
	4647
	4648	/* write back the updated pointers */
	4649	pArgs->source=source;
	4650	pArgs->target=(char *)target;
	4651	pArgs->offsets=offsets;
	4652	}
	4653
	4654	/*
	4655	* This is another simple conversion function for internal use by other
	4656	* conversion implementations.
	4657	* It does not use the converter state nor call callbacks.
	4658	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	4659	* It handles conversion extensions but not GB 18030.
	4660	*
	4661	* It converts one single Unicode code point into codepage bytes, encoded
	4662	* as one 32-bit value. The function returns the number of bytes in *pValue:
	4663	* 1..4 the number of bytes in *pValue
	4664	* 0 unassigned (*pValue undefined)
	4665	* -1 illegal (currently not used, *pValue undefined)
	4666	*
	4667	* *pValue will contain the resulting bytes with the last byte in bits 7..0,
	4668	* the second to last byte in bits 15..8, etc.
	4669	* Currently, the function assumes but does not check that 0<=c<=0x10ffff.
	4670	*/
	4671	U_CFUNC int32_t
	4672	ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
	4673	UChar32 c, uint32_t *pValue,
	4674	UBool useFallback) {
	4675	const int32_t *cx;
	4676	const uint16_t *table;
	4677	#if 0
	4678	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
	4679	const uint8_t *p;
	4680	#endif
	4681	uint32_t stage2Entry;
	4682	uint32_t value;
	4683	int32_t length;
	4684
	4685	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	4686	if(c<=0xffff \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	4687	table=sharedData->mbcs.fromUnicodeTable;
	4688
	4689	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
	4690	if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
	4691	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
	4692	/* is this code point assigned, or do we use fallbacks? */
	4693	if(useFallback ? value>=0x800 : value>=0xc00) {
	4694	*pValue=value&0xff;
	4695	return 1;
	4696	}
	4697	} else /* outputType!=MBCS_OUTPUT_1 */ {
	4698	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	4699
	4700	/* get the bytes and the length for the output */
	4701	switch(sharedData->mbcs.outputType) {
	4702	case MBCS_OUTPUT_2:
	4703	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4704	if(value<=0xff) {
	4705	length=1;
	4706	} else {
	4707	length=2;
	4708	}
	4709	break;
	4710	#if 0
	4711	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
	4712	case MBCS_OUTPUT_DBCS_ONLY:
	4713	/* table with single-byte results, but only DBCS mappings used */
	4714	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4715	if(value<=0xff) {
	4716	/* no mapping or SBCS result, not taken for DBCS-only */
	4717	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
	4718	length=0;
	4719	} else {
	4720	length=2;
	4721	}
	4722	break;
	4723	case MBCS_OUTPUT_3:
	4724	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4725	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4726	if(value<=0xff) {
	4727	length=1;
	4728	} else if(value<=0xffff) {
	4729	length=2;
	4730	} else {
	4731	length=3;
	4732	}
	4733	break;
	4734	case MBCS_OUTPUT_4:
	4735	value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4736	if(value<=0xff) {
	4737	length=1;
	4738	} else if(value<=0xffff) {
	4739	length=2;
	4740	} else if(value<=0xffffff) {
	4741	length=3;
	4742	} else {
	4743	length=4;
	4744	}
	4745	break;
	4746	case MBCS_OUTPUT_3_EUC:
	4747	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4748	/* EUC 16-bit fixed-length representation */
	4749	if(value<=0xff) {
	4750	length=1;
	4751	} else if((value&0x8000)==0) {
	4752	value\|=0x8e8000;
	4753	length=3;
	4754	} else if((value&0x80)==0) {
	4755	value\|=0x8f0080;
	4756	length=3;
	4757	} else {
	4758	length=2;
	4759	}
	4760	break;
	4761	case MBCS_OUTPUT_4_EUC:
	4762	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4763	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4764	/* EUC 16-bit fixed-length representation applied to the first two bytes */
	4765	if(value<=0xff) {
	4766	length=1;
	4767	} else if(value<=0xffff) {
	4768	length=2;
	4769	} else if((value&0x800000)==0) {
	4770	value\|=0x8e800000;
	4771	length=4;
	4772	} else if((value&0x8000)==0) {
	4773	value\|=0x8f008000;
	4774	length=4;
	4775	} else {
	4776	length=3;
	4777	}
	4778	break;
	4779	#endif
	4780	default:
	4781	/* must not occur */
	4782	return -1;
	4783	}
	4784
	4785	/* is this code point assigned, or do we use fallbacks? */
	4786	if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
	4787	(FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
	4788	) {
	4789	/*
	4790	* We allow a 0 byte output if the "assigned" bit is set for this entry.
	4791	* There is no way with this data structure for fallback output
	4792	* to be a zero byte.
	4793	*/
	4794	/* assigned */
	4795	*pValue=value;
	4796	return length;
	4797	}
	4798	}
	4799	}
	4800
	4801	cx=sharedData->mbcs.extIndexes;
	4802	if(cx!=NULL) {
	4803	length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
	4804	return length>=0 ? length : -length; /* return abs(length); */
	4805	}
	4806
	4807	/* unassigned */
	4808	return 0;
	4809	}
	4810
	4811
	4812	#if 0
	4813	/*
	4814	* This function has been moved to ucnv2022.c for inlining.
	4815	* This implementation is here only for documentation purposes
	4816	*/
	4817
	4818	/**
	4819	* This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages.
	4820	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	4821	* It does not handle conversion extensions (_extFromU()).
	4822	*
	4823	* It returns the codepage byte for the code point, or -1 if it is unassigned.
	4824	*/
	4825	U_CFUNC int32_t
	4826	ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
	4827	UChar32 c,
	4828	UBool useFallback) {
	4829	const uint16_t *table;
	4830	int32_t value;
	4831
	4832	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	4833	if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	4834	return -1;
	4835	}
	4836
	4837	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
	4838	table=sharedData->mbcs.fromUnicodeTable;
	4839
	4840	/* get the byte for the output */
	4841	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
	4842	/* is this code point assigned, or do we use fallbacks? */
	4843	if(useFallback ? value>=0x800 : value>=0xc00) {
	4844	return value&0xff;
	4845	} else {
	4846	return -1;
	4847	}
	4848	}
	4849	#endif
	4850
	4851	/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
	4852
	4853	/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
	4854	static const UChar32
	4855	utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
	4856
	4857	/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
	4858	static const UChar32
	4859	utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
	4860
	4861	static void
	4862	ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
	4863	UConverterToUnicodeArgs *pToUArgs,
	4864	UErrorCode *pErrorCode) {
	4865	UConverter utf8, cnv;
	4866	const uint8_t source, sourceLimit;
	4867	uint8_t *target;
	4868	int32_t targetCapacity;
	4869
	4870	const uint16_t table, sbcsIndex;
	4871	const uint16_t *results;
	4872
	4873	int8_t oldToULength, toULength, toULimit;
	4874
	4875	UChar32 c;
	4876	uint8_t b, t1, t2;
	4877
	4878	uint32_t asciiRoundtrips;
	4879	uint16_t value, minValue;
	4880	UBool hasSupplementary;
	4881
	4882	/* set up the local pointers */
	4883	utf8=pToUArgs->converter;
	4884	cnv=pFromUArgs->converter;
	4885	source=(uint8_t *)pToUArgs->source;
	4886	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
	4887	target=(uint8_t *)pFromUArgs->target;
	4888	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
	4889
	4890	table=cnv->sharedData->mbcs.fromUnicodeTable;
	4891	sbcsIndex=cnv->sharedData->mbcs.sbcsIndex;
	4892	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	4893	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	4894	} else {
	4895	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	4896	}
	4897	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	4898
	4899	if(cnv->useFallback) {
	4900	/* use all roundtrip and fallback results */
	4901	minValue=0x800;
	4902	} else {
	4903	/* use only roundtrips and fallbacks from private-use characters */
	4904	minValue=0xc00;
	4905	}
	4906	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
	4907
	4908	/* get the converter state from the UTF-8 UConverter */
	4909	c=(UChar32)utf8->toUnicodeStatus;
	4910	if(c!=0) {
	4911	toULength=oldToULength=utf8->toULength;
	4912	toULimit=(int8_t)utf8->mode;
	4913	} else {
	4914	toULength=oldToULength=toULimit=0;
	4915	}
	4916
	4917	/*
	4918	* Make sure that the last byte sequence before sourceLimit is complete
	4919	* or runs into a lead byte.
	4920	* Do not go back into the bytes that will be read for finishing a partial
	4921	* sequence from the previous buffer.
	4922	* In the conversion loop compare source with sourceLimit only once
	4923	* per multi-byte character.
	4924	*/
	4925	{
	4926	int32_t i, length;
	4927
	4928	length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
	4929	for(i=0; i<3 && i<length;) {
	4930	b=*(sourceLimit-i-1);
	4931	if(U8_IS_TRAIL(b)) {
	4932	++i;
	4933	} else {
	4934	if(i<U8_COUNT_TRAIL_BYTES(b)) {
	4935	/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
	4936	sourceLimit-=i+1;
	4937	}
	4938	break;
	4939	}
	4940	}
	4941	}
	4942
	4943	if(c!=0 && targetCapacity>0) {
	4944	utf8->toUnicodeStatus=0;
	4945	utf8->toULength=0;
	4946	goto moreBytes;
	4947	/*
	4948	* Note: We could avoid the goto by duplicating some of the moreBytes
	4949	* code, but only up to the point of collecting a complete UTF-8
	4950	* sequence; then recurse for the toUBytes[toULength]
	4951	* and then continue with normal conversion.
	4952	*
	4953	* If so, move this code to just after initializing the minimum
	4954	* set of local variables for reading the UTF-8 input
	4955	* (utf8, source, target, limits but not cnv, table, minValue, etc.).
	4956	*
	4957	* Potential advantages:
	4958	* - avoid the goto
	4959	* - oldToULength could become a local variable in just those code blocks
	4960	* that deal with buffer boundaries
	4961	* - possibly faster if the goto prevents some compiler optimizations
	4962	* (this would need measuring to confirm)
	4963	* Disadvantage:
	4964	* - code duplication
	4965	*/
	4966	}
	4967
	4968	/* conversion loop */
	4969	while(source<sourceLimit) {
	4970	if(targetCapacity>0) {
	4971	b=*source++;
	4972	if((int8_t)b>=0) {
	4973	/* convert ASCII */
	4974	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
	4975	*target++=(uint8_t)b;
	4976	--targetCapacity;
	4977	continue;
	4978	} else {
	4979	c=b;
	4980	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c);
	4981	}
	4982	} else {
	4983	if(b<0xe0) {
	4984	if( /* handle U+0080..U+07FF inline */
	4985	b>=0xc2 &&
	4986	(t1=(uint8_t)(*source-0x80)) <= 0x3f
	4987	) {
	4988	c=b&0x1f;
	4989	++source;
	4990	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1);
	4991	if(value>=minValue) {
	4992	*target++=(uint8_t)value;
	4993	--targetCapacity;
	4994	continue;
	4995	} else {
	4996	c=(c<<6)\|t1;
	4997	}
	4998	} else {
	4999	c=-1;
	5000	}
	5001	} else if(b==0xe0) {
	5002	if( /* handle U+0800..U+0FFF inline */
	5003	(t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 &&
	5004	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
	5005	) {
	5006	c=t1;
	5007	source+=2;
	5008	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2);
	5009	if(value>=minValue) {
	5010	*target++=(uint8_t)value;
	5011	--targetCapacity;
	5012	continue;
	5013	} else {
	5014	c=(c<<6)\|t2;
	5015	}
	5016	} else {
	5017	c=-1;
	5018	}
	5019	} else {
	5020	c=-1;
	5021	}
	5022
	5023	if(c<0) {
	5024	/* handle "complicated" and error cases, and continuing partial characters */
	5025	oldToULength=0;
	5026	toULength=1;
	5027	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
	5028	c=b;
	5029	moreBytes:
	5030	while(toULength<toULimit) {
	5031	/*
	5032	* The sourceLimit may have been adjusted before the conversion loop
	5033	* to stop before a truncated sequence.
	5034	* Here we need to use the real limit in case we have two truncated
	5035	* sequences at the end.
	5036	* See ticket #7492.
	5037	*/
	5038	if(source<(uint8_t *)pToUArgs->sourceLimit) {
	5039	b=*source;
	5040	if(U8_IS_TRAIL(b)) {
	5041	++source;
	5042	++toULength;
	5043	c=(c<<6)+b;
	5044	} else {
	5045	break; /* sequence too short, stop with toULength<toULimit */
	5046	}
	5047	} else {
	5048	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
	5049	source-=(toULength-oldToULength);
	5050	while(oldToULength<toULength) {
	5051	utf8->toUBytes[oldToULength++]=*source++;
	5052	}
	5053	utf8->toUnicodeStatus=c;
	5054	utf8->toULength=toULength;
	5055	utf8->mode=toULimit;
	5056	pToUArgs->source=(char *)source;
	5057	pFromUArgs->target=(char *)target;
	5058	return;
	5059	}
	5060	}
	5061
	5062	if( toULength==toULimit && /* consumed all trail bytes */
	5063	(toULength==3 \|\| toULength==2) && /* BMP */
	5064	(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
	5065	(c<=0xd7ff \|\| 0xe000<=c) /* not a surrogate */
	5066	) {
	5067	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	5068	} else if(
	5069	toULength==toULimit && toULength==4 &&
	5070	(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
	5071	) {
	5072	/* supplementary code point */
	5073	if(!hasSupplementary) {
	5074	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	5075	value=0;
	5076	} else {
	5077	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	5078	}
	5079	} else {
	5080	/* error handling: illegal UTF-8 byte sequence */
	5081	source-=(toULength-oldToULength);
	5082	while(oldToULength<toULength) {
	5083	utf8->toUBytes[oldToULength++]=*source++;
	5084	}
	5085	utf8->toULength=toULength;
	5086	pToUArgs->source=(char *)source;
	5087	pFromUArgs->target=(char *)target;
	5088	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	5089	return;
	5090	}
	5091	}
	5092	}
	5093
	5094	if(value>=minValue) {
	5095	/* output the mapping for c */
	5096	*target++=(uint8_t)value;
	5097	--targetCapacity;
	5098	} else {
	5099	/* value<minValue means c is unassigned (unmappable) */
	5100	/*
	5101	* Try an extension mapping.
	5102	* Pass in no source because we don't have UTF-16 input.
	5103	* If we have a partial match on c, we will return and revert
	5104	* to UTF-8->UTF-16->charset conversion.
	5105	*/
	5106	static const UChar nul=0;
	5107	const UChar *noSource=&nul;
	5108	c=_extFromU(cnv, cnv->sharedData,
	5109	c, &noSource, noSource,
	5110	&target, target+targetCapacity,
	5111	NULL, -1,
	5112	pFromUArgs->flush,
	5113	pErrorCode);
	5114
	5115	if(U_FAILURE(*pErrorCode)) {
	5116	/* not mappable or buffer overflow */
	5117	cnv->fromUChar32=c;
	5118	break;
	5119	} else if(cnv->preFromUFirstCP>=0) {
	5120	/*
	5121	* Partial match, return and revert to pivoting.
	5122	* In normal from-UTF-16 conversion, we would just continue
	5123	* but then exit the loop because the extension match would
	5124	* have consumed the source.
	5125	*/
	5126	*pErrorCode=U_USING_DEFAULT_WARNING;
	5127	break;
	5128	} else {
	5129	/* a mapping was written to the target, continue */
	5130
	5131	/* recalculate the targetCapacity after an extension mapping */
	5132	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
	5133	}
	5134	}
	5135	} else {
	5136	/* target is full */
	5137	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	5138	break;
	5139	}
	5140	}
	5141
	5142	/*
	5143	* The sourceLimit may have been adjusted before the conversion loop
	5144	* to stop before a truncated sequence.
	5145	* If so, then collect the truncated sequence now.
	5146	*/
	5147	if(U_SUCCESS(*pErrorCode) &&
	5148	cnv->preFromUFirstCP<0 &&
	5149	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
	5150	c=utf8->toUBytes[0]=b=*source++;
	5151	toULength=1;
	5152	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
	5153	while(source<sourceLimit) {
	5154	utf8->toUBytes[toULength++]=b=*source++;
	5155	c=(c<<6)+b;
	5156	}
	5157	utf8->toUnicodeStatus=c;
	5158	utf8->toULength=toULength;
	5159	utf8->mode=toULimit;
	5160	}
	5161
	5162	/* write back the updated pointers */
	5163	pToUArgs->source=(char *)source;
	5164	pFromUArgs->target=(char *)target;
	5165	}
	5166
	5167	static void
	5168	ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
	5169	UConverterToUnicodeArgs *pToUArgs,
	5170	UErrorCode *pErrorCode) {
	5171	UConverter utf8, cnv;
	5172	const uint8_t source, sourceLimit;
	5173	uint8_t *target;
	5174	int32_t targetCapacity;
	5175
	5176	const uint16_t table, mbcsIndex;
	5177	const uint16_t *results;
	5178
	5179	int8_t oldToULength, toULength, toULimit;
	5180
	5181	UChar32 c;
	5182	uint8_t b, t1, t2;
	5183
	5184	uint32_t stage2Entry;
	5185	uint32_t asciiRoundtrips;
	5186	uint16_t value;
	5187	UBool hasSupplementary;
	5188
	5189	/* set up the local pointers */
	5190	utf8=pToUArgs->converter;
	5191	cnv=pFromUArgs->converter;
	5192	source=(uint8_t *)pToUArgs->source;
	5193	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
	5194	target=(uint8_t *)pFromUArgs->target;
	5195	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
	5196
	5197	table=cnv->sharedData->mbcs.fromUnicodeTable;
	5198	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
	5199	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	5200	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	5201	} else {
	5202	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	5203	}
	5204	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	5205
	5206	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
	5207
	5208	/* get the converter state from the UTF-8 UConverter */
	5209	c=(UChar32)utf8->toUnicodeStatus;
	5210	if(c!=0) {
	5211	toULength=oldToULength=utf8->toULength;
	5212	toULimit=(int8_t)utf8->mode;
	5213	} else {
	5214	toULength=oldToULength=toULimit=0;
	5215	}
	5216
	5217	/*
	5218	* Make sure that the last byte sequence before sourceLimit is complete
	5219	* or runs into a lead byte.
	5220	* Do not go back into the bytes that will be read for finishing a partial
	5221	* sequence from the previous buffer.
	5222	* In the conversion loop compare source with sourceLimit only once
	5223	* per multi-byte character.
	5224	*/
	5225	{
	5226	int32_t i, length;
	5227
	5228	length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
	5229	for(i=0; i<3 && i<length;) {
	5230	b=*(sourceLimit-i-1);
	5231	if(U8_IS_TRAIL(b)) {
	5232	++i;
	5233	} else {
	5234	if(i<U8_COUNT_TRAIL_BYTES(b)) {
	5235	/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
	5236	sourceLimit-=i+1;
	5237	}
	5238	break;
	5239	}
	5240	}
	5241	}
	5242
	5243	if(c!=0 && targetCapacity>0) {
	5244	utf8->toUnicodeStatus=0;
	5245	utf8->toULength=0;
	5246	goto moreBytes;
	5247	/* See note in ucnv_SBCSFromUTF8() about this goto. */
	5248	}
	5249
	5250	/* conversion loop */
	5251	while(source<sourceLimit) {
	5252	if(targetCapacity>0) {
	5253	b=*source++;
	5254	if((int8_t)b>=0) {
	5255	/* convert ASCII */
	5256	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
	5257	*target++=b;
	5258	--targetCapacity;
	5259	continue;
	5260	} else {
	5261	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b);
	5262	if(value==0) {
	5263	c=b;
	5264	goto unassigned;
	5265	}
	5266	}
	5267	} else {
	5268	if(b>0xe0) {
	5269	if( /* handle U+1000..U+D7FF inline */
	5270	(((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) \|\|
	5271	(b==0xed && (t1 <= 0x1f))) &&
	5272	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
	5273	) {
	5274	c=((b&0xf)<<6)\|t1;
	5275	source+=2;
	5276	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
	5277	if(value==0) {
	5278	c=(c<<6)\|t2;
	5279	goto unassigned;
	5280	}
	5281	} else {
	5282	c=-1;
	5283	}
	5284	} else if(b<0xe0) {
	5285	if( /* handle U+0080..U+07FF inline */
	5286	b>=0xc2 &&
	5287	(t1=(uint8_t)(*source-0x80)) <= 0x3f
	5288	) {
	5289	c=b&0x1f;
	5290	++source;
	5291	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1);
	5292	if(value==0) {
	5293	c=(c<<6)\|t1;
	5294	goto unassigned;
	5295	}
	5296	} else {
	5297	c=-1;
	5298	}
	5299	} else {
	5300	c=-1;
	5301	}
	5302
	5303	if(c<0) {
	5304	/* handle "complicated" and error cases, and continuing partial characters */
	5305	oldToULength=0;
	5306	toULength=1;
	5307	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
	5308	c=b;
	5309	moreBytes:
	5310	while(toULength<toULimit) {
	5311	/*
	5312	* The sourceLimit may have been adjusted before the conversion loop
	5313	* to stop before a truncated sequence.
	5314	* Here we need to use the real limit in case we have two truncated
	5315	* sequences at the end.
	5316	* See ticket #7492.
	5317	*/
	5318	if(source<(uint8_t *)pToUArgs->sourceLimit) {
	5319	b=*source;
	5320	if(U8_IS_TRAIL(b)) {
	5321	++source;
	5322	++toULength;
	5323	c=(c<<6)+b;
	5324	} else {
	5325	break; /* sequence too short, stop with toULength<toULimit */
	5326	}
	5327	} else {
	5328	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
	5329	source-=(toULength-oldToULength);
	5330	while(oldToULength<toULength) {
	5331	utf8->toUBytes[oldToULength++]=*source++;
	5332	}
	5333	utf8->toUnicodeStatus=c;
	5334	utf8->toULength=toULength;
	5335	utf8->mode=toULimit;
	5336	pToUArgs->source=(char *)source;
	5337	pFromUArgs->target=(char *)target;
	5338	return;
	5339	}
	5340	}
	5341
	5342	if( toULength==toULimit && /* consumed all trail bytes */
	5343	(toULength==3 \|\| toULength==2) && /* BMP */
	5344	(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
	5345	(c<=0xd7ff \|\| 0xe000<=c) /* not a surrogate */
	5346	) {
	5347	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	5348	} else if(
	5349	toULength==toULimit && toULength==4 &&
	5350	(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
	5351	) {
	5352	/* supplementary code point */
	5353	if(!hasSupplementary) {
	5354	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	5355	stage2Entry=0;
	5356	} else {
	5357	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	5358	}
	5359	} else {
	5360	/* error handling: illegal UTF-8 byte sequence */
	5361	source-=(toULength-oldToULength);
	5362	while(oldToULength<toULength) {
	5363	utf8->toUBytes[oldToULength++]=*source++;
	5364	}
	5365	utf8->toULength=toULength;
	5366	pToUArgs->source=(char *)source;
	5367	pFromUArgs->target=(char *)target;
	5368	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	5369	return;
	5370	}
	5371
	5372	/* get the bytes and the length for the output */
	5373	/* MBCS_OUTPUT_2 */
	5374	value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c);
	5375
	5376	/* is this code point assigned, or do we use fallbacks? */
	5377	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
	5378	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
	5379	) {
	5380	goto unassigned;
	5381	}
	5382	}
	5383	}
	5384
	5385	/* write the output character bytes from value and length */
	5386	/* from the first if in the loop we know that targetCapacity>0 */
	5387	if(value<=0xff) {
	5388	/* this is easy because we know that there is enough space */
	5389	*target++=(uint8_t)value;
	5390	--targetCapacity;
	5391	} else /* length==2 */ {
	5392	*target++=(uint8_t)(value>>8);
	5393	if(2<=targetCapacity) {
	5394	*target++=(uint8_t)value;
	5395	targetCapacity-=2;
	5396	} else {
	5397	cnv->charErrorBuffer[0]=(char)value;
	5398	cnv->charErrorBufferLength=1;
	5399
	5400	/* target overflow */
	5401	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	5402	break;
	5403	}
	5404	}
	5405	continue;
	5406
	5407	unassigned:
	5408	{
	5409	/*
	5410	* Try an extension mapping.
	5411	* Pass in no source because we don't have UTF-16 input.
	5412	* If we have a partial match on c, we will return and revert
	5413	* to UTF-8->UTF-16->charset conversion.
	5414	*/
	5415	static const UChar nul=0;
	5416	const UChar *noSource=&nul;
	5417	c=_extFromU(cnv, cnv->sharedData,
	5418	c, &noSource, noSource,
	5419	&target, target+targetCapacity,
	5420	NULL, -1,
	5421	pFromUArgs->flush,
	5422	pErrorCode);
	5423
	5424	if(U_FAILURE(*pErrorCode)) {
	5425	/* not mappable or buffer overflow */
	5426	cnv->fromUChar32=c;
	5427	break;
	5428	} else if(cnv->preFromUFirstCP>=0) {
	5429	/*
	5430	* Partial match, return and revert to pivoting.
	5431	* In normal from-UTF-16 conversion, we would just continue
	5432	* but then exit the loop because the extension match would
	5433	* have consumed the source.
	5434	*/
	5435	*pErrorCode=U_USING_DEFAULT_WARNING;
	5436	break;
	5437	} else {
	5438	/* a mapping was written to the target, continue */
	5439
	5440	/* recalculate the targetCapacity after an extension mapping */
	5441	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
	5442	continue;
	5443	}
	5444	}
	5445	} else {
	5446	/* target is full */
	5447	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	5448	break;
	5449	}
	5450	}
	5451
	5452	/*
	5453	* The sourceLimit may have been adjusted before the conversion loop
	5454	* to stop before a truncated sequence.
	5455	* If so, then collect the truncated sequence now.
	5456	*/
	5457	if(U_SUCCESS(*pErrorCode) &&
	5458	cnv->preFromUFirstCP<0 &&
	5459	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
	5460	c=utf8->toUBytes[0]=b=*source++;
	5461	toULength=1;
	5462	toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
	5463	while(source<sourceLimit) {
	5464	utf8->toUBytes[toULength++]=b=*source++;
	5465	c=(c<<6)+b;
	5466	}
	5467	utf8->toUnicodeStatus=c;
	5468	utf8->toULength=toULength;
	5469	utf8->mode=toULimit;
	5470	}
	5471
	5472	/* write back the updated pointers */
	5473	pToUArgs->source=(char *)source;
	5474	pFromUArgs->target=(char *)target;
	5475	}
	5476
	5477	/* miscellaneous ------------------------------------------------------------ */
	5478
	5479	static void
	5480	ucnv_MBCSGetStarters(const UConverter* cnv,
	5481	UBool starters[256],
	5482	UErrorCode *pErrorCode) {
	5483	const int32_t *state0;
	5484	int i;
	5485
	5486	state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
	5487	for(i=0; i<256; ++i) {
	5488	/* all bytes that cause a state transition from state 0 are lead bytes */
	5489	starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
	5490	}
	5491	}
	5492
	5493	/*
	5494	* This is an internal function that allows other converter implementations
	5495	* to check whether a byte is a lead byte.
	5496	*/
	5497	U_CFUNC UBool
	5498	ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
	5499	return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
	5500	}
	5501
	5502	static void
	5503	ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
	5504	int32_t offsetIndex,
	5505	UErrorCode *pErrorCode) {
	5506	UConverter *cnv=pArgs->converter;
	5507	char p, subchar;
	5508	char buffer[4];
	5509	int32_t length;
	5510
	5511	/* first, select between subChar and subChar1 */
	5512	if( cnv->subChar1!=0 &&
	5513	(cnv->sharedData->mbcs.extIndexes!=NULL ?
	5514	cnv->useSubChar1 :
	5515	(cnv->invalidUCharBuffer[0]<=0xff))
	5516	) {
	5517	/* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
	5518	subchar=(char *)&cnv->subChar1;
	5519	length=1;
	5520	} else {
	5521	/* select subChar in all other cases */
	5522	subchar=(char *)cnv->subChars;
	5523	length=cnv->subCharLen;
	5524	}
	5525
	5526	/* reset the selector for the next code point */
	5527	cnv->useSubChar1=FALSE;
	5528
	5529	if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
	5530	p=buffer;
	5531
	5532	/* fromUnicodeStatus contains prevLength */
	5533	switch(length) {
	5534	case 1:
	5535	if(cnv->fromUnicodeStatus==2) {
	5536	/* DBCS mode and SBCS sub char: change to SBCS */
	5537	cnv->fromUnicodeStatus=1;
	5538	*p++=UCNV_SI;
	5539	}
	5540	*p++=subchar[0];
	5541	break;
	5542	case 2:
	5543	if(cnv->fromUnicodeStatus<=1) {
	5544	/* SBCS mode and DBCS sub char: change to DBCS */
	5545	cnv->fromUnicodeStatus=2;
	5546	*p++=UCNV_SO;
	5547	}
	5548	*p++=subchar[0];
	5549	*p++=subchar[1];
	5550	break;
	5551	default:
	5552	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	5553	return;
	5554	}
	5555	subchar=buffer;
	5556	length=(int32_t)(p-buffer);
	5557	}
	5558
	5559	ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode);
	5560	}
	5561
	5562	U_CFUNC UConverterType
	5563	ucnv_MBCSGetType(const UConverter* converter) {
	5564	/* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
	5565	if(converter->sharedData->mbcs.countStates==1) {
	5566	return (UConverterType)UCNV_SBCS;
	5567	} else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
	5568	return (UConverterType)UCNV_EBCDIC_STATEFUL;
	5569	} else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
	5570	return (UConverterType)UCNV_DBCS;
	5571	}
	5572	return (UConverterType)UCNV_MBCS;
	5573	}
	5574
	5575	static const UConverterImpl _SBCSUTF8Impl={
	5576	UCNV_MBCS,
	5577
	5578	ucnv_MBCSLoad,
	5579	ucnv_MBCSUnload,
	5580
	5581	ucnv_MBCSOpen,
	5582	NULL,
	5583	NULL,
	5584
	5585	ucnv_MBCSToUnicodeWithOffsets,
	5586	ucnv_MBCSToUnicodeWithOffsets,
	5587	ucnv_MBCSFromUnicodeWithOffsets,
	5588	ucnv_MBCSFromUnicodeWithOffsets,
	5589	ucnv_MBCSGetNextUChar,
	5590
	5591	ucnv_MBCSGetStarters,
	5592	ucnv_MBCSGetName,
	5593	ucnv_MBCSWriteSub,
	5594	NULL,
	5595	ucnv_MBCSGetUnicodeSet,
	5596
	5597	NULL,
	5598	ucnv_SBCSFromUTF8
	5599	};
	5600
	5601	static const UConverterImpl _DBCSUTF8Impl={
	5602	UCNV_MBCS,
	5603
	5604	ucnv_MBCSLoad,
	5605	ucnv_MBCSUnload,
	5606
	5607	ucnv_MBCSOpen,
	5608	NULL,
	5609	NULL,
	5610
	5611	ucnv_MBCSToUnicodeWithOffsets,
	5612	ucnv_MBCSToUnicodeWithOffsets,
	5613	ucnv_MBCSFromUnicodeWithOffsets,
	5614	ucnv_MBCSFromUnicodeWithOffsets,
	5615	ucnv_MBCSGetNextUChar,
	5616
	5617	ucnv_MBCSGetStarters,
	5618	ucnv_MBCSGetName,
	5619	ucnv_MBCSWriteSub,
	5620	NULL,
	5621	ucnv_MBCSGetUnicodeSet,
	5622
	5623	NULL,
	5624	ucnv_DBCSFromUTF8
	5625	};
	5626
	5627	static const UConverterImpl _MBCSImpl={
	5628	UCNV_MBCS,
	5629
	5630	ucnv_MBCSLoad,
	5631	ucnv_MBCSUnload,
	5632
	5633	ucnv_MBCSOpen,
	5634	NULL,
	5635	NULL,
	5636
	5637	ucnv_MBCSToUnicodeWithOffsets,
	5638	ucnv_MBCSToUnicodeWithOffsets,
	5639	ucnv_MBCSFromUnicodeWithOffsets,
	5640	ucnv_MBCSFromUnicodeWithOffsets,
	5641	ucnv_MBCSGetNextUChar,
	5642
	5643	ucnv_MBCSGetStarters,
	5644	ucnv_MBCSGetName,
	5645	ucnv_MBCSWriteSub,
	5646	NULL,
	5647	ucnv_MBCSGetUnicodeSet
	5648	};
	5649
	5650
	5651	/* Static data is in tools/makeconv/ucnvstat.c for data-based
	5652	* converters. Be sure to update it as well.
	5653	*/
	5654
	5655	const UConverterSharedData _MBCSData={
	5656	sizeof(UConverterSharedData), 1,
	5657	NULL, NULL, NULL, FALSE, &_MBCSImpl,
	5658	0
	5659	};
	5660
	5661	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */