git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	******************************************************************************
	5	*
	6	* Copyright (C) 2000-2016, International Business Machines
	7	* Corporation and others. All Rights Reserved.
	8	*
	9	******************************************************************************
	10	* file name: ucnvmbcs.cpp
	11	* encoding: UTF-8
	12	* tab size: 8 (not used)
	13	* indentation:4
	14	*
	15	* created on: 2000jul03
	16	* created by: Markus W. Scherer
	17	*
	18	* The current code in this file replaces the previous implementation
	19	* of conversion code from multi-byte codepages to Unicode and back.
	20	* This implementation supports the following:
	21	* - legacy variable-length codepages with up to 4 bytes per character
	22	* - all Unicode code points (up to 0x10ffff)
	23	* - efficient distinction of unassigned vs. illegal byte sequences
	24	* - it is possible in fromUnicode() to directly deal with simple
	25	* stateful encodings (used for EBCDIC_STATEFUL)
	26	* - it is possible to convert Unicode code points
	27	* to a single zero byte (but not as a fallback except for SBCS)
	28	*
	29	* Remaining limitations in fromUnicode:
	30	* - byte sequences must not have leading zero bytes
	31	* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
	32	* - limitation to up to 4 bytes per character
	33	*
	34	* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these
	35	* limitations and adds m:n character mappings and other features.
	36	* See ucnv_ext.h for details.
	37	*
	38	* Change history:
	39	*
	40	* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
	41	* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
	42	* macros to ucnvmbcs.h file
	43	*/
	44
	45	#include "unicode/utypes.h"
	46
	47	#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
	48
	49	#include "unicode/ucnv.h"
	50	#include "unicode/ucnv_cb.h"
	51	#include "unicode/udata.h"
	52	#include "unicode/uset.h"
	53	#include "unicode/utf8.h"
	54	#include "unicode/utf16.h"
	55	#include "ucnv_bld.h"
	56	#include "ucnvmbcs.h"
	57	#include "ucnv_ext.h"
	58	#include "ucnv_cnv.h"
	59	#include "cmemory.h"
	60	#include "cstring.h"
	61	#include "umutex.h"
	62	#include "ustr_imp.h"
	63
	64	/* control optimizations according to the platform */
	65	#define MBCS_UNROLL_SINGLE_TO_BMP 1
	66	#define MBCS_UNROLL_SINGLE_FROM_BMP 0
	67
	68	/*
	69	* _MBCSHeader versions 5.3 & 4.3
	70	* (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
	71	*
	72	* This version is optional. Version 5 is used for incompatible data format changes.
	73	* makeconv will continue to generate version 4 files if possible.
	74	*
	75	* Changes from version 4:
	76	*
	77	* The main difference is an additional _MBCSHeader field with
	78	* - the length (number of uint32_t) of the _MBCSHeader
	79	* - flags for further incompatible data format changes
	80	* - flags for further, backward compatible data format changes
	81	*
	82	* The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
	83	* the file and needs to be reconstituted at load time.
	84	* This requires a utf8Friendly format with an additional mbcsIndex table for fast
	85	* (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
	86	* (For details about these structures see below, and see ucnvmbcs.h.)
	87	*
	88	* utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
	89	* of the Unicode code points. (This requires that the .ucm file has the \|0 etc.
	90	* precision markers for all mappings.)
	91	*
	92	* All fallbacks have been moved to the extension table, leaving only roundtrips in the
	93	* omitted data that can be reconstituted from the toUnicode data.
	94	*
	95	* Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
	96	* With only roundtrip mappings in the base fromUnicode data, this part is fully
	97	* redundant with the mbcsIndex and will be reconstituted from that (also using the
	98	* stage 1 table which contains the information about how stage 2 was compacted).
	99	*
	100	* The rest of the stage 2 table, the part for code points above maxFastUChar,
	101	* is stored in the file and will be appended to the reconstituted part.
	102	*
	103	* The entire fromUBytes array is omitted from the file and will be reconstitued.
	104	* This is done by enumerating all toUnicode roundtrip mappings, performing
	105	* each mapping (using the stage 1 and reconstituted stage 2 tables) and
	106	* writing instead of reading the byte values.
	107	*
	108	* _MBCSHeader version 4.3
	109	*
	110	* Change from version 4.2:
	111	* - Optional utf8Friendly data structures, with 64-entry stage 3 block
	112	* allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
	113	* files which can be used instead of stages 1 & 2.
	114	* Faster lookups for roundtrips from most commonly used characters,
	115	* and lookups from UTF-8 byte sequences with a natural bit distribution.
	116	* See ucnvmbcs.h for more details.
	117	*
	118	* Change from version 4.1:
	119	* - Added an optional extension table structure at the end of the .cnv file.
	120	* It is present if the upper bits of the header flags field contains a non-zero
	121	* byte offset to it.
	122	* Files that contain only a conversion table and no base table
	123	* use the special outputType MBCS_OUTPUT_EXT_ONLY.
	124	* These contain the base table name between the MBCS header and the extension
	125	* data.
	126	*
	127	* Change from version 4.0:
	128	* - Replace header.reserved with header.fromUBytesLength so that all
	129	* fields in the data have length.
	130	*
	131	* Changes from version 3 (for performance improvements):
	132	* - new bit distribution for state table entries
	133	* - reordered action codes
	134	* - new data structure for single-byte fromUnicode
	135	* + stage 2 only contains indexes
	136	* + stage 3 stores 16 bits per character with classification bits 15..8
	137	* - no multiplier for stage 1 entries
	138	* - stage 2 for non-single-byte codepages contains the index and the flags in
	139	* one 32-bit value
	140	* - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers
	141	*
	142	* For more details about old versions of the MBCS data structure, see
	143	* the corresponding versions of this file.
	144	*
	145	* Converting stateless codepage data ---------------------------------------***
	146	* (or codepage data with simple states) to Unicode.
	147	*
	148	* Data structure and algorithm for converting from complex legacy codepages
	149	* to Unicode. (Designed before 2000-may-22.)
	150	*
	151	* The basic idea is that the structure of legacy codepages can be described
	152	* with state tables.
	153	* When reading a byte stream, each input byte causes a state transition.
	154	* Some transitions result in the output of a code point, some result in
	155	* "unassigned" or "illegal" output.
	156	* This is used here for character conversion.
	157	*
	158	* The data structure begins with a state table consisting of a row
	159	* per state, with 256 entries (columns) per row for each possible input
	160	* byte value.
	161	* Each entry is 32 bits wide, with two formats distinguished by
	162	* the sign bit (bit 31):
	163	*
	164	* One format for transitional entries (bit 31 not set) for non-final bytes, and
	165	* one format for final entries (bit 31 set).
	166	* Both formats contain the number of the next state in the same bit
	167	* positions.
	168	* State 0 is the initial state.
	169	*
	170	* Most of the time, the offset values of subsequent states are added
	171	* up to a scalar value. This value will eventually be the index of
	172	* the Unicode code point in a table that follows the state table.
	173	* The effect is that the code points for final state table rows
	174	* are contiguous. The code points of final state rows follow each other
	175	* in the order of the references to those final states by previous
	176	* states, etc.
	177	*
	178	* For some terminal states, the offset is itself the output Unicode
	179	* code point (16 bits for a BMP code point or 20 bits for a supplementary
	180	* code point (stored as code point minus 0x10000 so that 20 bits are enough).
	181	* For others, the code point in the Unicode table is stored with either
	182	* one or two code units: one for BMP code points, two for a pair of
	183	* surrogates.
	184	* All code points for a final state entry take up the same number of code
	185	* units, regardless of whether they all actually _use_ the same number
	186	* of code units. This is necessary for simple array access.
	187	*
	188	* An additional feature comes in with what in ICU is called "fallback"
	189	* mappings:
	190	*
	191	* In addition to round-trippable, precise, 1:1 mappings, there are often
	192	* mappings defined between similar, though not the same, characters.
	193	* Typically, such mappings occur only in fromUnicode mapping tables because
	194	* Unicode has a superset repertoire of most other codepages. However, it
	195	* is possible to provide such mappings in the toUnicode tables, too.
	196	* In this case, the fallback mappings are partly integrated into the
	197	* general state tables because the structure of the encoding includes their
	198	* byte sequences.
	199	* For final entries in an initial state, fallback mappings are stored in
	200	* the entry itself like with roundtrip mappings.
	201	* For other final entries, they are stored in the code units table if
	202	* the entry is for a pair of code units.
	203	* For single-unit results in the code units table, there is no space to
	204	* alternatively hold a fallback mapping; in this case, the code unit
	205	* is stored as U+fffe (unassigned), and the fallback mapping needs to
	206	* be looked up by the scalar offset value in a separate table.
	207	*
	208	* "Unassigned" state entries really mean "structurally unassigned",
	209	* i.e., such a byte sequence will never have a mapping result.
	210	*
	211	* The interpretation of the bits in each entry is as follows:
	212	*
	213	* Bit 31 not set, not a terminal entry ("transitional"):
	214	* 30..24 next state
	215	* 23..0 offset delta, to be added up
	216	*
	217	* Bit 31 set, terminal ("final") entry:
	218	* 30..24 next state (regardless of action code)
	219	* 23..20 action code:
	220	* action codes 0 and 1 result in precise-mapping Unicode code points
	221	* 0 valid byte sequence
	222	* 19..16 not used, 0
	223	* 15..0 16-bit Unicode BMP code point
	224	* never U+fffe or U+ffff
	225	* 1 valid byte sequence
	226	* 19..0 20-bit Unicode supplementary code point
	227	* never U+fffe or U+ffff
	228	*
	229	* action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points
	230	* 2 valid byte sequence (fallback)
	231	* 19..16 not used, 0
	232	* 15..0 16-bit Unicode BMP code point as fallback result
	233	* 3 valid byte sequence (fallback)
	234	* 19..0 20-bit Unicode supplementary code point as fallback result
	235	*
	236	* action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results
	237	* depending on the code units they result in
	238	* 4 valid byte sequence
	239	* 19..9 not used, 0
	240	* 8..0 final offset delta
	241	* pointing to one 16-bit code unit which may be
	242	* fffe unassigned -- look for a fallback for this offset
	243	* ffff illegal
	244	* 5 valid byte sequence
	245	* 19..9 not used, 0
	246	* 8..0 final offset delta
	247	* pointing to two 16-bit code units
	248	* (typically UTF-16 surrogates)
	249	* the result depends on the first code unit as follows:
	250	* 0000..d7ff roundtrip BMP code point (1st alone)
	251	* d800..dbff roundtrip surrogate pair (1st, 2nd)
	252	* dc00..dfff fallback surrogate pair (1st-400, 2nd)
	253	* e000 roundtrip BMP code point (2nd alone)
	254	* e001 fallback BMP code point (2nd alone)
	255	* fffe unassigned
	256	* ffff illegal
	257	* (the final offset deltas are at most 255 * 2,
	258	* times 2 because of storing code unit pairs)
	259	*
	260	* 6 unassigned byte sequence
	261	* 19..16 not used, 0
	262	* 15..0 16-bit Unicode BMP code point U+fffe (new with version 2)
	263	* this does not contain a final offset delta because the main
	264	* purpose of this action code is to save scalar offset values;
	265	* therefore, fallback values cannot be assigned to byte
	266	* sequences that result in this action code
	267	* 7 illegal byte sequence
	268	* 19..16 not used, 0
	269	* 15..0 16-bit Unicode BMP code point U+ffff (new with version 2)
	270	* 8 state change only
	271	* 19..0 not used, 0
	272	* useful for state changes in simple stateful encodings,
	273	* at Shift-In/Shift-Out codes
	274	*
	275	*
	276	* 9..15 reserved for future use
	277	* current implementations will only perform a state change
	278	* and ignore bits 19..0
	279	*
	280	* An encoding with contiguous ranges of unassigned byte sequences, like
	281	* Shift-JIS and especially EUC-TW, can be stored efficiently by having
	282	* at least two states for the trail bytes:
	283	* One trail byte state that results in code points, and one that only
	284	* has "unassigned" and "illegal" terminal states.
	285	*
	286	* Note: partly by accident, this data structure supports simple stateful
	287	* encodings without any additional logic.
	288	* Currently, only simple Shift-In/Shift-Out schemes are handled with
	289	* appropriate state tables (especially EBCDIC_STATEFUL!).
	290	*
	291	* MBCS version 2 added:
	292	* unassigned and illegal action codes have U+fffe and U+ffff
	293	* instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP()
	294	*
	295	* Converting from Unicode to codepage bytes --------------------------------***
	296	*
	297	* The conversion data structure for fromUnicode is designed for the known
	298	* structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to
	299	* a sequence of 1..4 bytes, in addition to a flag that indicates if there is
	300	* a roundtrip mapping.
	301	*
	302	* The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3
	303	* like in the character properties table.
	304	* The beginning of the trie is at offsetFromUTable, the beginning of stage 3
	305	* with the resulting bytes is at offsetFromUBytes.
	306	*
	307	* Beginning with version 4, single-byte codepages have a significantly different
	308	* trie compared to other codepages.
	309	* In all cases, the entry in stage 1 is directly the index of the block of
	310	* 64 entries in stage 2.
	311	*
	312	* Single-byte lookup:
	313	*
	314	* Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3.
	315	* Stage 3 contains one 16-bit word per result:
	316	* Bits 15..8 indicate the kind of result:
	317	* f roundtrip result
	318	* c fallback result from private-use code point
	319	* 8 fallback result from other code points
	320	* 0 unassigned
	321	* Bits 7..0 contain the codepage byte. A zero byte is always possible.
	322	*
	323	* In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly
	324	* file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup
	325	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
	326	* ASCII code points can be looked up with a linear array access into stage 3.
	327	* See maxFastUChar and other details in ucnvmbcs.h.
	328	*
	329	* Multi-byte lookup:
	330	*
	331	* Stage 2 contains a 32-bit word for each 16-block in stage 3:
	332	* Bits 31..16 contain flags for which stage 3 entries contain roundtrip results
	333	* test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
	334	* If this test is false, then a non-zero result will be interpreted as
	335	* a fallback mapping.
	336	* Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char)
	337	*
	338	* Stage 3 contains 2, 3, or 4 bytes per result.
	339	* 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness,
	340	* while 3 bytes are stored as bytes in big-endian order.
	341	* Leading zero bytes are ignored, and the number of bytes is counted.
	342	* A zero byte mapping result is possible as a roundtrip result.
	343	* For some output types, the actual result is processed from this;
	344	* see ucnv_MBCSFromUnicodeWithOffsets().
	345	*
	346	* Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10),
	347	* or (version 3 and up) for BMP-only codepages, it contains 64 entries.
	348	*
	349	* In version 4.3, a utf8Friendly file contains an mbcsIndex table.
	350	* For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup
	351	* becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
	352	* ASCII code points can be looked up with a linear array access into stage 3.
	353	* See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h.
	354	*
	355	* In version 3, stage 2 blocks may overlap by multiples of the multiplier
	356	* for compaction.
	357	* In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks)
	358	* may overlap by any number of entries.
	359	*
	360	* MBCS version 2 added:
	361	* the converter checks for known output types, which allows
	362	* adding new ones without crashing an unaware converter
	363	*/
	364
	365	/**
	366	* Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
	367	* consecutive sequences of bytes, starting from the one encoded in value,
	368	* to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
	369	* Does not currently support m:n mappings or reverse fallbacks.
	370	* This function will not be called for sequences of bytes with leading zeros.
	371	*
	372	* @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
	373	* @param value contains 1..4 bytes of the first byte sequence, right-aligned
	374	* @param codePoints resulting Unicode code points, or negative if a byte sequence does
	375	* not map to anything
	376	* @return TRUE to continue enumeration, FALSE to stop
	377	*/
	378	typedef UBool U_CALLCONV
	379	UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
	380
	381	static void U_CALLCONV
	382	ucnv_MBCSLoad(UConverterSharedData *sharedData,
	383	UConverterLoadArgs *pArgs,
	384	const uint8_t *raw,
	385	UErrorCode *pErrorCode);
	386
	387	static void U_CALLCONV
	388	ucnv_MBCSUnload(UConverterSharedData *sharedData);
	389
	390	static void U_CALLCONV
	391	ucnv_MBCSOpen(UConverter *cnv,
	392	UConverterLoadArgs *pArgs,
	393	UErrorCode *pErrorCode);
	394
	395	static UChar32 U_CALLCONV
	396	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
	397	UErrorCode *pErrorCode);
	398
	399	static void U_CALLCONV
	400	ucnv_MBCSGetStarters(const UConverter* cnv,
	401	UBool starters[256],
	402	UErrorCode *pErrorCode);
	403
	404	U_CDECL_BEGIN
	405	static const char* U_CALLCONV
	406	ucnv_MBCSGetName(const UConverter *cnv);
	407	U_CDECL_END
	408
	409	static void U_CALLCONV
	410	ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
	411	int32_t offsetIndex,
	412	UErrorCode *pErrorCode);
	413
	414	static UChar32 U_CALLCONV
	415	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
	416	UErrorCode *pErrorCode);
	417
	418	static void U_CALLCONV
	419	ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
	420	UConverterToUnicodeArgs *pToUArgs,
	421	UErrorCode *pErrorCode);
	422
	423	static void U_CALLCONV
	424	ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
	425	const USetAdder *sa,
	426	UConverterUnicodeSet which,
	427	UErrorCode *pErrorCode);
	428
	429	static void U_CALLCONV
	430	ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
	431	UConverterToUnicodeArgs *pToUArgs,
	432	UErrorCode *pErrorCode);
	433
	434	static const UConverterImpl _SBCSUTF8Impl={
	435	UCNV_MBCS,
	436
	437	ucnv_MBCSLoad,
	438	ucnv_MBCSUnload,
	439
	440	ucnv_MBCSOpen,
	441	NULL,
	442	NULL,
	443
	444	ucnv_MBCSToUnicodeWithOffsets,
	445	ucnv_MBCSToUnicodeWithOffsets,
	446	ucnv_MBCSFromUnicodeWithOffsets,
	447	ucnv_MBCSFromUnicodeWithOffsets,
	448	ucnv_MBCSGetNextUChar,
	449
	450	ucnv_MBCSGetStarters,
	451	ucnv_MBCSGetName,
	452	ucnv_MBCSWriteSub,
	453	NULL,
	454	ucnv_MBCSGetUnicodeSet,
	455
	456	NULL,
	457	ucnv_SBCSFromUTF8
	458	};
	459
	460	static const UConverterImpl _DBCSUTF8Impl={
	461	UCNV_MBCS,
	462
	463	ucnv_MBCSLoad,
	464	ucnv_MBCSUnload,
	465
	466	ucnv_MBCSOpen,
	467	NULL,
	468	NULL,
	469
	470	ucnv_MBCSToUnicodeWithOffsets,
	471	ucnv_MBCSToUnicodeWithOffsets,
	472	ucnv_MBCSFromUnicodeWithOffsets,
	473	ucnv_MBCSFromUnicodeWithOffsets,
	474	ucnv_MBCSGetNextUChar,
	475
	476	ucnv_MBCSGetStarters,
	477	ucnv_MBCSGetName,
	478	ucnv_MBCSWriteSub,
	479	NULL,
	480	ucnv_MBCSGetUnicodeSet,
	481
	482	NULL,
	483	ucnv_DBCSFromUTF8
	484	};
	485
	486	static const UConverterImpl _MBCSImpl={
	487	UCNV_MBCS,
	488
	489	ucnv_MBCSLoad,
	490	ucnv_MBCSUnload,
	491
	492	ucnv_MBCSOpen,
	493	NULL,
	494	NULL,
	495
	496	ucnv_MBCSToUnicodeWithOffsets,
	497	ucnv_MBCSToUnicodeWithOffsets,
	498	ucnv_MBCSFromUnicodeWithOffsets,
	499	ucnv_MBCSFromUnicodeWithOffsets,
	500	ucnv_MBCSGetNextUChar,
	501
	502	ucnv_MBCSGetStarters,
	503	ucnv_MBCSGetName,
	504	ucnv_MBCSWriteSub,
	505	NULL,
	506	ucnv_MBCSGetUnicodeSet,
	507	NULL,
	508	NULL
	509	};
	510
	511	/* Static data is in tools/makeconv/ucnvstat.c for data-based
	512	* converters. Be sure to update it as well.
	513	*/
	514
	515	const UConverterSharedData _MBCSData={
	516	sizeof(UConverterSharedData), 1,
	517	NULL, NULL, FALSE, TRUE, &_MBCSImpl,
	518	0, UCNV_MBCS_TABLE_INITIALIZER
	519	};
	520
	521
	522	/* GB 18030 data ------------------------------------------------------------ */
	523
	524	/* helper macros for linear values for GB 18030 four-byte sequences */
	525	#define LINEAR_18030(a, b, c, d) ((((a)10+(b))126L+(c))*10L+(d))
	526
	527	#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30)
	528
	529	#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)
	530
	531	/*
	532	* Some ranges of GB 18030 where both the Unicode code points and the
	533	* GB four-byte sequences are contiguous and are handled algorithmically by
	534	* the special callback functions below.
	535	* The values are start & end of Unicode & GB codes.
	536	*
	537	* Note that single surrogates are not mapped by GB 18030
	538	* as of the re-released mapping tables from 2000-nov-30.
	539	*/
	540	static const uint32_t
	541	gb18030Ranges[14][4]={
	542	{0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
	543	{0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
	544	{0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
	545	{0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
	546	{0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
	547	{0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
	548	{0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
	549	{0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)},
	550	{0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)},
	551	{0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)},
	552	{0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)},
	553	{0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)},
	554	{0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)},
	555	{0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)}
	556	};
	557
	558	/* bit flag for UConverter.options indicating GB 18030 special handling */
	559	#define _MBCS_OPTION_GB18030 0x8000
	560
	561	/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
	562	#define _MBCS_OPTION_KEIS 0x01000
	563	#define _MBCS_OPTION_JEF 0x02000
	564	#define _MBCS_OPTION_JIPS 0x04000
	565
	566	#define KEIS_SO_CHAR_1 0x0A
	567	#define KEIS_SO_CHAR_2 0x42
	568	#define KEIS_SI_CHAR_1 0x0A
	569	#define KEIS_SI_CHAR_2 0x41
	570
	571	#define JEF_SO_CHAR 0x28
	572	#define JEF_SI_CHAR 0x29
	573
	574	#define JIPS_SO_CHAR_1 0x1A
	575	#define JIPS_SO_CHAR_2 0x70
	576	#define JIPS_SI_CHAR_1 0x1A
	577	#define JIPS_SI_CHAR_2 0x71
	578
	579	enum SISO_Option {
	580	SI,
	581	SO
	582	};
	583	typedef enum SISO_Option SISO_Option;
	584
	585	static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
	586	int32_t SISOLength = 0;
	587
	588	switch (option) {
	589	case SI:
	590	if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
	591	value[0] = KEIS_SI_CHAR_1;
	592	value[1] = KEIS_SI_CHAR_2;
	593	SISOLength = 2;
	594	} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
	595	value[0] = JEF_SI_CHAR;
	596	SISOLength = 1;
	597	} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
	598	value[0] = JIPS_SI_CHAR_1;
	599	value[1] = JIPS_SI_CHAR_2;
	600	SISOLength = 2;
	601	} else {
	602	value[0] = UCNV_SI;
	603	SISOLength = 1;
	604	}
	605	break;
	606	case SO:
	607	if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
	608	value[0] = KEIS_SO_CHAR_1;
	609	value[1] = KEIS_SO_CHAR_2;
	610	SISOLength = 2;
	611	} else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
	612	value[0] = JEF_SO_CHAR;
	613	SISOLength = 1;
	614	} else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
	615	value[0] = JIPS_SO_CHAR_1;
	616	value[1] = JIPS_SO_CHAR_2;
	617	SISOLength = 2;
	618	} else {
	619	value[0] = UCNV_SO;
	620	SISOLength = 1;
	621	}
	622	break;
	623	default:
	624	/* Should never happen. */
	625	break;
	626	}
	627
	628	return SISOLength;
	629	}
	630
	631	/* Miscellaneous ------------------------------------------------------------ */
	632
	633	/* similar to ucnv_MBCSGetNextUChar() but recursive */
	634	static UBool
	635	enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
	636	int32_t state, uint32_t offset,
	637	uint32_t value,
	638	UConverterEnumToUCallback callback, const void context,
	639	UErrorCode *pErrorCode) {
	640	UChar32 codePoints[32];
	641	const int32_t *row;
	642	const uint16_t *unicodeCodeUnits;
	643	UChar32 anyCodePoints;
	644	int32_t b, limit;
	645
	646	row=mbcsTable->stateTable[state];
	647	unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
	648
	649	value<<=8;
	650	anyCodePoints=-1; /* becomes non-negative if there is a mapping */
	651
	652	b=(stateProps[state]&0x38)<<2;
	653	if(b==0 && stateProps[state]>=0x40) {
	654	/* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
	655	codePoints[0]=U_SENTINEL;
	656	b=1;
	657	}
	658	limit=((stateProps[state]&7)+1)<<5;
	659	while(b<limit) {
	660	int32_t entry=row[b];
	661	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	662	int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry);
	663	if(stateProps[nextState]>=0) {
	664	/* recurse to a state with non-ignorable actions */
	665	if(!enumToU(
	666	mbcsTable, stateProps, nextState,
	667	offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
	668	value\|(uint32_t)b,
	669	callback, context,
	670	pErrorCode)) {
	671	return FALSE;
	672	}
	673	}
	674	codePoints[b&0x1f]=U_SENTINEL;
	675	} else {
	676	UChar32 c;
	677	int32_t action;
	678
	679	/*
	680	* An if-else-if chain provides more reliable performance for
	681	* the most common cases compared to a switch.
	682	*/
	683	action=MBCS_ENTRY_FINAL_ACTION(entry);
	684	if(action==MBCS_STATE_VALID_DIRECT_16) {
	685	/* output BMP code point */
	686	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	687	} else if(action==MBCS_STATE_VALID_16) {
	688	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
	689	c=unicodeCodeUnits[finalOffset];
	690	if(c<0xfffe) {
	691	/* output BMP code point */
	692	} else {
	693	c=U_SENTINEL;
	694	}
	695	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	696	int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
	697	c=unicodeCodeUnits[finalOffset++];
	698	if(c<0xd800) {
	699	/* output BMP code point below 0xd800 */
	700	} else if(c<=0xdbff) {
	701	/* output roundtrip or fallback supplementary code point */
	702	c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
	703	} else if(c==0xe000) {
	704	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	705	c=unicodeCodeUnits[finalOffset];
	706	} else {
	707	c=U_SENTINEL;
	708	}
	709	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
	710	/* output supplementary code point */
	711	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
	712	} else {
	713	c=U_SENTINEL;
	714	}
	715
	716	codePoints[b&0x1f]=c;
	717	anyCodePoints&=c;
	718	}
	719	if(((++b)&0x1f)==0) {
	720	if(anyCodePoints>=0) {
	721	if(!callback(context, value\|(uint32_t)(b-0x20), codePoints)) {
	722	return FALSE;
	723	}
	724	anyCodePoints=-1;
	725	}
	726	}
	727	}
	728	return TRUE;
	729	}
	730
	731	/*
	732	* Only called if stateProps[state]==-1.
	733	* A recursive call may do stateProps[state]\|=0x40 if this state is the target of an
	734	* MBCS_STATE_CHANGE_ONLY.
	735	*/
	736	static int8_t
	737	getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
	738	const int32_t *row;
	739	int32_t min, max, entry, nextState;
	740
	741	row=stateTable[state];
	742	stateProps[state]=0;
	743
	744	/* find first non-ignorable state */
	745	for(min=0;; ++min) {
	746	entry=row[min];
	747	nextState=MBCS_ENTRY_STATE(entry);
	748	if(stateProps[nextState]==-1) {
	749	getStateProp(stateTable, stateProps, nextState);
	750	}
	751	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	752	if(stateProps[nextState]>=0) {
	753	break;
	754	}
	755	} else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
	756	break;
	757	}
	758	if(min==0xff) {
	759	stateProps[state]=-0x40; /* (int8_t)0xc0 */
	760	return stateProps[state];
	761	}
	762	}
	763	stateProps[state]\|=(int8_t)((min>>5)<<3);
	764
	765	/* find last non-ignorable state */
	766	for(max=0xff; min<max; --max) {
	767	entry=row[max];
	768	nextState=MBCS_ENTRY_STATE(entry);
	769	if(stateProps[nextState]==-1) {
	770	getStateProp(stateTable, stateProps, nextState);
	771	}
	772	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	773	if(stateProps[nextState]>=0) {
	774	break;
	775	}
	776	} else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
	777	break;
	778	}
	779	}
	780	stateProps[state]\|=(int8_t)(max>>5);
	781
	782	/* recurse further and collect direct-state information */
	783	while(min<=max) {
	784	entry=row[min];
	785	nextState=MBCS_ENTRY_STATE(entry);
	786	if(stateProps[nextState]==-1) {
	787	getStateProp(stateTable, stateProps, nextState);
	788	}
	789	if(MBCS_ENTRY_IS_FINAL(entry)) {
	790	stateProps[nextState]\|=0x40;
	791	if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) {
	792	stateProps[state]\|=0x40;
	793	}
	794	}
	795	++min;
	796	}
	797	return stateProps[state];
	798	}
	799
	800	/*
	801	* Internal function enumerating the toUnicode data of an MBCS converter.
	802	* Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
	803	* table, but could also be used for a future ucnv_getUnicodeSet() option
	804	* that includes reverse fallbacks (after updating this function's implementation).
	805	* Currently only handles roundtrip mappings.
	806	* Does not currently handle extensions.
	807	*/
	808	static void
	809	ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
	810	UConverterEnumToUCallback callback, const void context,
	811	UErrorCode *pErrorCode) {
	812	/*
	813	* Properties for each state, to speed up the enumeration.
	814	* Ignorable actions are unassigned/illegal/state-change-only:
	815	* They do not lead to mappings.
	816	*
	817	* Bits 7..6:
	818	* 1 direct/initial state (stateful converters have multiple)
	819	* 0 non-initial state with transitions or with non-ignorable result actions
	820	* -1 final state with only ignorable actions
	821	*
	822	* Bits 5..3:
	823	* The lowest byte value with non-ignorable actions is
	824	* value<<5 (rounded down).
	825	*
	826	* Bits 2..0:
	827	* The highest byte value with non-ignorable actions is
	828	* (value<<5)&0x1f (rounded up).
	829	*/
	830	int8_t stateProps[MBCS_MAX_STATE_COUNT];
	831	int32_t state;
	832
	833	uprv_memset(stateProps, -1, sizeof(stateProps));
	834
	835	/* recurse from state 0 and set all stateProps */
	836	getStateProp(mbcsTable->stateTable, stateProps, 0);
	837
	838	for(state=0; state<mbcsTable->countStates; ++state) {
	839	/*if(stateProps[state]==-1) {
	840	printf("unused/unreachable <icu:state> %d\n", state);
	841	}*/
	842	if(stateProps[state]>=0x40) {
	843	/* start from each direct state */
	844	enumToU(
	845	mbcsTable, stateProps, state, 0, 0,
	846	callback, context,
	847	pErrorCode);
	848	}
	849	}
	850	}
	851
	852	U_CFUNC void
	853	ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
	854	const USetAdder *sa,
	855	UConverterUnicodeSet which,
	856	UConverterSetFilter filter,
	857	UErrorCode *pErrorCode) {
	858	const UConverterMBCSTable *mbcsTable;
	859	const uint16_t *table;
	860
	861	uint32_t st3;
	862	uint16_t st1, maxStage1, st2;
	863
	864	UChar32 c;
	865
	866	/* enumerate the from-Unicode trie table */
	867	mbcsTable=&sharedData->mbcs;
	868	table=mbcsTable->fromUnicodeTable;
	869	if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
	870	maxStage1=0x440;
	871	} else {
	872	maxStage1=0x40;
	873	}
	874
	875	c=0; /* keep track of the current code point while enumerating */
	876
	877	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
	878	const uint16_t stage2, stage3, *results;
	879	uint16_t minValue;
	880
	881	results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
	882
	883	/*
	884	* Set a threshold variable for selecting which mappings to use.
	885	* See ucnv_MBCSSingleFromBMPWithOffsets() and
	886	* MBCS_SINGLE_RESULT_FROM_U() for details.
	887	*/
	888	if(which==UCNV_ROUNDTRIP_SET) {
	889	/* use only roundtrips */
	890	minValue=0xf00;
	891	} else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
	892	/* use all roundtrip and fallback results */
	893	minValue=0x800;
	894	}
	895
	896	for(st1=0; st1<maxStage1; ++st1) {
	897	st2=table[st1];
	898	if(st2>maxStage1) {
	899	stage2=table+st2;
	900	for(st2=0; st2<64; ++st2) {
	901	if((st3=stage2[st2])!=0) {
	902	/* read the stage 3 block */
	903	stage3=results+st3;
	904
	905	do {
	906	if(*stage3++>=minValue) {
	907	sa->add(sa->set, c);
	908	}
	909	} while((++c&0xf)!=0);
	910	} else {
	911	c+=16; /* empty stage 3 block */
	912	}
	913	}
	914	} else {
	915	c+=1024; /* empty stage 2 block */
	916	}
	917	}
	918	} else {
	919	const uint32_t *stage2;
	920	const uint8_t stage3, bytes;
	921	uint32_t st3Multiplier;
	922	uint32_t value;
	923	UBool useFallback;
	924
	925	bytes=mbcsTable->fromUnicodeBytes;
	926
	927	useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
	928
	929	switch(mbcsTable->outputType) {
	930	case MBCS_OUTPUT_3:
	931	case MBCS_OUTPUT_4_EUC:
	932	st3Multiplier=3;
	933	break;
	934	case MBCS_OUTPUT_4:
	935	st3Multiplier=4;
	936	break;
	937	default:
	938	st3Multiplier=2;
	939	break;
	940	}
	941
	942	for(st1=0; st1<maxStage1; ++st1) {
	943	st2=table[st1];
	944	if(st2>(maxStage1>>1)) {
	945	stage2=(const uint32_t *)table+st2;
	946	for(st2=0; st2<64; ++st2) {
	947	if((st3=stage2[st2])!=0) {
	948	/* read the stage 3 block */
	949	stage3=bytes+st3Multiplier16(uint32_t)(uint16_t)st3;
	950
	951	/* get the roundtrip flags for the stage 3 block */
	952	st3>>=16;
	953
	954	/*
	955	* Add code points for which the roundtrip flag is set,
	956	* or which map to non-zero bytes if we use fallbacks.
	957	* See ucnv_MBCSFromUnicodeWithOffsets() for details.
	958	*/
	959	switch(filter) {
	960	case UCNV_SET_FILTER_NONE:
	961	do {
	962	if(st3&1) {
	963	sa->add(sa->set, c);
	964	stage3+=st3Multiplier;
	965	} else if(useFallback) {
	966	uint8_t b=0;
	967	switch(st3Multiplier) {
	968	case 4:
	969	b\|=*stage3++;
	970	U_FALLTHROUGH;
	971	case 3:
	972	b\|=*stage3++;
	973	U_FALLTHROUGH;
	974	case 2:
	975	b\|=stage3[0]\|stage3[1];
	976	stage3+=2;
	977	U_FALLTHROUGH;
	978	default:
	979	break;
	980	}
	981	if(b!=0) {
	982	sa->add(sa->set, c);
	983	}
	984	}
	985	st3>>=1;
	986	} while((++c&0xf)!=0);
	987	break;
	988	case UCNV_SET_FILTER_DBCS_ONLY:
	989	/* Ignore single-byte results (<0x100). */
	990	do {
	991	if(((st3&1)!=0 \|\| useFallback) && ((const uint16_t )stage3)>=0x100) {
	992	sa->add(sa->set, c);
	993	}
	994	st3>>=1;
	995	stage3+=2; /* +=st3Multiplier */
	996	} while((++c&0xf)!=0);
	997	break;
	998	case UCNV_SET_FILTER_2022_CN:
	999	/* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
	1000	do {
	1001	if(((st3&1)!=0 \|\| useFallback) && ((value=*stage3)==0x81 \|\| value==0x82)) {
	1002	sa->add(sa->set, c);
	1003	}
	1004	st3>>=1;
	1005	stage3+=3; /* +=st3Multiplier */
	1006	} while((++c&0xf)!=0);
	1007	break;
	1008	case UCNV_SET_FILTER_SJIS:
	1009	/* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
	1010	do {
	1011	if(((st3&1)!=0 \|\| useFallback) && (value=((const uint16_t )stage3))>=0x8140 && value<=0xeffc) {
	1012	sa->add(sa->set, c);
	1013	}
	1014	st3>>=1;
	1015	stage3+=2; /* +=st3Multiplier */
	1016	} while((++c&0xf)!=0);
	1017	break;
	1018	case UCNV_SET_FILTER_GR94DBCS:
	1019	/* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
	1020	do {
	1021	if( ((st3&1)!=0 \|\| useFallback) &&
	1022	(uint16_t)((value=((const uint16_t )stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
	1023	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
	1024	) {
	1025	sa->add(sa->set, c);
	1026	}
	1027	st3>>=1;
	1028	stage3+=2; /* +=st3Multiplier */
	1029	} while((++c&0xf)!=0);
	1030	break;
	1031	case UCNV_SET_FILTER_HZ:
	1032	/* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
	1033	do {
	1034	if( ((st3&1)!=0 \|\| useFallback) &&
	1035	(uint16_t)((value=((const uint16_t )stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
	1036	(uint8_t)(value-0xa1)<=(0xfe - 0xa1)
	1037	) {
	1038	sa->add(sa->set, c);
	1039	}
	1040	st3>>=1;
	1041	stage3+=2; /* +=st3Multiplier */
	1042	} while((++c&0xf)!=0);
	1043	break;
	1044	default:
	1045	*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
	1046	return;
	1047	}
	1048	} else {
	1049	c+=16; /* empty stage 3 block */
	1050	}
	1051	}
	1052	} else {
	1053	c+=1024; /* empty stage 2 block */
	1054	}
	1055	}
	1056	}
	1057
	1058	ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
	1059	}
	1060
	1061	U_CFUNC void
	1062	ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
	1063	const USetAdder *sa,
	1064	UConverterUnicodeSet which,
	1065	UErrorCode *pErrorCode) {
	1066	ucnv_MBCSGetFilteredUnicodeSetForUnicode(
	1067	sharedData, sa, which,
	1068	sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
	1069	UCNV_SET_FILTER_DBCS_ONLY :
	1070	UCNV_SET_FILTER_NONE,
	1071	pErrorCode);
	1072	}
	1073
	1074	static void U_CALLCONV
	1075	ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
	1076	const USetAdder *sa,
	1077	UConverterUnicodeSet which,
	1078	UErrorCode *pErrorCode) {
	1079	if(cnv->options&_MBCS_OPTION_GB18030) {
	1080	sa->addRange(sa->set, 0, 0xd7ff);
	1081	sa->addRange(sa->set, 0xe000, 0x10ffff);
	1082	} else {
	1083	ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
	1084	}
	1085	}
	1086
	1087	/* conversion extensions for input not in the main table -------------------- */
	1088
	1089	/*
	1090	* Hardcoded extension handling for GB 18030.
	1091	* Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
	1092	*
	1093	* In the future, conversion extensions may handle m:n mappings and delta tables,
	1094	* see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html
	1095	*
	1096	* If an input character cannot be mapped, then these functions set an error
	1097	* code. The framework will then call the callback function.
	1098	*/
	1099
	1100	/*
	1101	* @return if(U_FAILURE) return the code point for cnv->fromUChar32
	1102	* else return 0 after output has been written to the target
	1103	*/
	1104	static UChar32
	1105	_extFromU(UConverter cnv, const UConverterSharedData sharedData,
	1106	UChar32 cp,
	1107	const UChar *source, const UChar sourceLimit,
	1108	uint8_t *target, const uint8_t targetLimit,
	1109	int32_t **offsets, int32_t sourceIndex,
	1110	UBool flush,
	1111	UErrorCode *pErrorCode) {
	1112	const int32_t *cx;
	1113
	1114	cnv->useSubChar1=FALSE;
	1115
	1116	if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
	1117	ucnv_extInitialMatchFromU(
	1118	cnv, cx,
	1119	cp, source, sourceLimit,
	1120	(char *)target, (char )targetLimit,
	1121	offsets, sourceIndex,
	1122	flush,
	1123	pErrorCode)
	1124	) {
	1125	return 0; /* an extension mapping handled the input */
	1126	}
	1127
	1128	/* GB 18030 */
	1129	if((cnv->options&_MBCS_OPTION_GB18030)!=0) {
	1130	const uint32_t *range;
	1131	int32_t i;
	1132
	1133	range=gb18030Ranges[0];
	1134	for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
	1135	if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
	1136	/* found the Unicode code point, output the four-byte sequence for it */
	1137	uint32_t linear;
	1138	char bytes[4];
	1139
	1140	/* get the linear value of the first GB 18030 code in this range */
	1141	linear=range[2]-LINEAR_18030_BASE;
	1142
	1143	/* add the offset from the beginning of the range */
	1144	linear+=((uint32_t)cp-range[0]);
	1145
	1146	/* turn this into a four-byte sequence */
	1147	bytes[3]=(char)(0x30+linear%10); linear/=10;
	1148	bytes[2]=(char)(0x81+linear%126); linear/=126;
	1149	bytes[1]=(char)(0x30+linear%10); linear/=10;
	1150	bytes[0]=(char)(0x81+linear);
	1151
	1152	/* output this sequence */
	1153	ucnv_fromUWriteBytes(cnv,
	1154	bytes, 4, (char *)target, (char )targetLimit,
	1155	offsets, sourceIndex, pErrorCode);
	1156	return 0;
	1157	}
	1158	}
	1159	}
	1160
	1161	/* no mapping */
	1162	*pErrorCode=U_INVALID_CHAR_FOUND;
	1163	return cp;
	1164	}
	1165
	1166	/*
	1167	* Input sequence: cnv->toUBytes[0..length[
	1168	* @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
	1169	* else return 0 after output has been written to the target
	1170	*/
	1171	static int8_t
	1172	_extToU(UConverter cnv, const UConverterSharedData sharedData,
	1173	int8_t length,
	1174	const uint8_t *source, const uint8_t sourceLimit,
	1175	UChar *target, const UChar targetLimit,
	1176	int32_t **offsets, int32_t sourceIndex,
	1177	UBool flush,
	1178	UErrorCode *pErrorCode) {
	1179	const int32_t *cx;
	1180
	1181	if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
	1182	ucnv_extInitialMatchToU(
	1183	cnv, cx,
	1184	length, (const char *)source, (const char )sourceLimit,
	1185	target, targetLimit,
	1186	offsets, sourceIndex,
	1187	flush,
	1188	pErrorCode)
	1189	) {
	1190	return 0; /* an extension mapping handled the input */
	1191	}
	1192
	1193	/* GB 18030 */
	1194	if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
	1195	const uint32_t *range;
	1196	uint32_t linear;
	1197	int32_t i;
	1198
	1199	linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]);
	1200	range=gb18030Ranges[0];
	1201	for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) {
	1202	if(range[2]<=linear && linear<=range[3]) {
	1203	/* found the sequence, output the Unicode code point for it */
	1204	*pErrorCode=U_ZERO_ERROR;
	1205
	1206	/* add the linear difference between the input and start sequences to the start code point */
	1207	linear=range[0]+(linear-range[2]);
	1208
	1209	/* output this code point */
	1210	ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode);
	1211
	1212	return 0;
	1213	}
	1214	}
	1215	}
	1216
	1217	/* no mapping */
	1218	*pErrorCode=U_INVALID_CHAR_FOUND;
	1219	return length;
	1220	}
	1221
	1222	/* EBCDIC swap LF<->NL ------------------------------------------------------ */
	1223
	1224	/*
	1225	* This code modifies a standard EBCDIC<->Unicode mapping table for
	1226	* OS/390 (z/OS) Unix System Services (Open Edition).
	1227	* The difference is in the mapping of Line Feed and New Line control codes:
	1228	* Standard EBCDIC maps
	1229	*
	1230	* <U000A> \x25 \|0
	1231	* <U0085> \x15 \|0
	1232	*
	1233	* but OS/390 USS EBCDIC swaps the control codes for LF and NL,
	1234	* mapping
	1235	*
	1236	* <U000A> \x15 \|0
	1237	* <U0085> \x25 \|0
	1238	*
	1239	* This code modifies a loaded standard EBCDIC<->Unicode mapping table
	1240	* by copying it into allocated memory and swapping the LF and NL values.
	1241	* It allows to support the same EBCDIC charset in both versions without
	1242	* duplicating the entire installed table.
	1243	*/
	1244
	1245	/* standard EBCDIC codes */
	1246	#define EBCDIC_LF 0x25
	1247	#define EBCDIC_NL 0x15
	1248
	1249	/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
	1250	#define EBCDIC_RT_LF 0xf25
	1251	#define EBCDIC_RT_NL 0xf15
	1252
	1253	/* Unicode code points */
	1254	#define U_LF 0x0a
	1255	#define U_NL 0x85
	1256
	1257	static UBool
	1258	_EBCDICSwapLFNL(UConverterSharedData sharedData, UErrorCode pErrorCode) {
	1259	UConverterMBCSTable *mbcsTable;
	1260
	1261	const uint16_t table, results;
	1262	const uint8_t *bytes;
	1263
	1264	int32_t (*newStateTable)[256];
	1265	uint16_t *newResults;
	1266	uint8_t *p;
	1267	char *name;
	1268
	1269	uint32_t stage2Entry;
	1270	uint32_t size, sizeofFromUBytes;
	1271
	1272	mbcsTable=&sharedData->mbcs;
	1273
	1274	table=mbcsTable->fromUnicodeTable;
	1275	bytes=mbcsTable->fromUnicodeBytes;
	1276	results=(const uint16_t *)bytes;
	1277
	1278	/*
	1279	* Check that this is an EBCDIC table with SBCS portion -
	1280	* SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings.
	1281	*
	1282	* If not, ignore the option. Options are always ignored if they do not apply.
	1283	*/
	1284	if(!(
	1285	(mbcsTable->outputType==MBCS_OUTPUT_1 \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) &&
	1286	mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
	1287	mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)
	1288	)) {
	1289	return FALSE;
	1290	}
	1291
	1292	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
	1293	if(!(
	1294	EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
	1295	EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL)
	1296	)) {
	1297	return FALSE;
	1298	}
	1299	} else /* MBCS_OUTPUT_2_SISO */ {
	1300	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
	1301	if(!(
	1302	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 &&
	1303	EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF)
	1304	)) {
	1305	return FALSE;
	1306	}
	1307
	1308	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
	1309	if(!(
	1310	MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 &&
	1311	EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL)
	1312	)) {
	1313	return FALSE;
	1314	}
	1315	}
	1316
	1317	if(mbcsTable->fromUBytesLength>0) {
	1318	/*
	1319	* We _know_ the number of bytes in the fromUnicodeBytes array
	1320	* starting with header.version 4.1.
	1321	*/
	1322	sizeofFromUBytes=mbcsTable->fromUBytesLength;
	1323	} else {
	1324	/*
	1325	* Otherwise:
	1326	* There used to be code to enumerate the fromUnicode
	1327	* trie and find the highest entry, but it was removed in ICU 3.2
	1328	* because it was not tested and caused a low code coverage number.
	1329	* See Jitterbug 3674.
	1330	* This affects only some .cnv file formats with a header.version
	1331	* below 4.1, and only when swaplfnl is requested.
	1332	*
	1333	* ucnvmbcs.c revision 1.99 is the last one with the
	1334	* ucnv_MBCSSizeofFromUBytes() function.
	1335	*/
	1336	*pErrorCode=U_INVALID_FORMAT_ERROR;
	1337	return FALSE;
	1338	}
	1339
	1340	/*
	1341	* The table has an appropriate format.
	1342	* Allocate and build
	1343	* - a modified to-Unicode state table
	1344	* - a modified from-Unicode output array
	1345	* - a converter name string with the swap option appended
	1346	*/
	1347	size=
	1348	mbcsTable->countStates*1024+
	1349	sizeofFromUBytes+
	1350	UCNV_MAX_CONVERTER_NAME_LENGTH+20;
	1351	p=(uint8_t *)uprv_malloc(size);
	1352	if(p==NULL) {
	1353	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	1354	return FALSE;
	1355	}
	1356
	1357	/* copy and modify the to-Unicode state table */
	1358	newStateTable=(int32_t (*)[256])p;
	1359	uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024);
	1360
	1361	newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
	1362	newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
	1363
	1364	/* copy and modify the from-Unicode result table */
	1365	newResults=(uint16_t *)newStateTable[mbcsTable->countStates];
	1366	uprv_memcpy(newResults, bytes, sizeofFromUBytes);
	1367
	1368	/* conveniently, the table access macros work on the left side of expressions */
	1369	if(mbcsTable->outputType==MBCS_OUTPUT_1) {
	1370	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL;
	1371	MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF;
	1372	} else /* MBCS_OUTPUT_2_SISO */ {
	1373	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
	1374	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL;
	1375
	1376	stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
	1377	MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF;
	1378	}
	1379
	1380	/* set the canonical converter name */
	1381	name=(char *)newResults+sizeofFromUBytes;
	1382	uprv_strcpy(name, sharedData->staticData->name);
	1383	uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING);
	1384
	1385	/* set the pointers */
	1386	icu::umtx_lock(NULL);
	1387	if(mbcsTable->swapLFNLStateTable==NULL) {
	1388	mbcsTable->swapLFNLStateTable=newStateTable;
	1389	mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults;
	1390	mbcsTable->swapLFNLName=name;
	1391
	1392	newStateTable=NULL;
	1393	}
	1394	icu::umtx_unlock(NULL);
	1395
	1396	/* release the allocated memory if another thread beat us to it */
	1397	if(newStateTable!=NULL) {
	1398	uprv_free(newStateTable);
	1399	}
	1400	return TRUE;
	1401	}
	1402
	1403	/* reconstitute omitted fromUnicode data ------------------------------------ */
	1404
	1405	/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
	1406	static UBool U_CALLCONV
	1407	writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
	1408	UConverterMBCSTable mbcsTable=(UConverterMBCSTable )context;
	1409	const uint16_t *table;
	1410	uint32_t *stage2;
	1411	uint8_t bytes, p;
	1412	UChar32 c;
	1413	int32_t i, st3;
	1414
	1415	table=mbcsTable->fromUnicodeTable;
	1416	bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
	1417
	1418	/* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
	1419	switch(mbcsTable->outputType) {
	1420	case MBCS_OUTPUT_3_EUC:
	1421	if(value<=0xffff) {
	1422	/* short sequences are stored directly */
	1423	/* code set 0 or 1 */
	1424	} else if(value<=0x8effff) {
	1425	/* code set 2 */
	1426	value&=0x7fff;
	1427	} else /* first byte is 0x8f */ {
	1428	/* code set 3 */
	1429	value&=0xff7f;
	1430	}
	1431	break;
	1432	case MBCS_OUTPUT_4_EUC:
	1433	if(value<=0xffffff) {
	1434	/* short sequences are stored directly */
	1435	/* code set 0 or 1 */
	1436	} else if(value<=0x8effffff) {
	1437	/* code set 2 */
	1438	value&=0x7fffff;
	1439	} else /* first byte is 0x8f */ {
	1440	/* code set 3 */
	1441	value&=0xff7fff;
	1442	}
	1443	break;
	1444	default:
	1445	break;
	1446	}
	1447
	1448	for(i=0; i<=0x1f; ++value, ++i) {
	1449	c=codePoints[i];
	1450	if(c<0) {
	1451	continue;
	1452	}
	1453
	1454	/* locate the stage 2 & 3 data */
	1455	stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
	1456	p=bytes;
	1457	st3=(int32_t)(uint16_t)stage216+(c&0xf);
	1458
	1459	/* write the codepage bytes into stage 3 */
	1460	switch(mbcsTable->outputType) {
	1461	case MBCS_OUTPUT_3:
	1462	case MBCS_OUTPUT_4_EUC:
	1463	p+=st3*3;
	1464	p[0]=(uint8_t)(value>>16);
	1465	p[1]=(uint8_t)(value>>8);
	1466	p[2]=(uint8_t)value;
	1467	break;
	1468	case MBCS_OUTPUT_4:
	1469	((uint32_t *)p)[st3]=value;
	1470	break;
	1471	default:
	1472	/* 2 bytes per character */
	1473	((uint16_t *)p)[st3]=(uint16_t)value;
	1474	break;
	1475	}
	1476
	1477	/* set the roundtrip flag */
	1478	*stage2\|=(1UL<<(16+(c&0xf)));
	1479	}
	1480	return TRUE;
	1481	}
	1482
	1483	static void
	1484	reconstituteData(UConverterMBCSTable *mbcsTable,
	1485	uint32_t stage1Length, uint32_t stage2Length,
	1486	uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */
	1487	UErrorCode *pErrorCode) {
	1488	uint16_t *stage1;
	1489	uint32_t *stage2;
	1490	uint32_t dataLength=stage1Length2+fullStage2Length4+mbcsTable->fromUBytesLength;
	1491	mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
	1492	if(mbcsTable->reconstitutedData==NULL) {
	1493	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	1494	return;
	1495	}
	1496	uprv_memset(mbcsTable->reconstitutedData, 0, dataLength);
	1497
	1498	/* copy existing data and reroute the pointers */
	1499	stage1=(uint16_t *)mbcsTable->reconstitutedData;
	1500	uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2);
	1501
	1502	stage2=(uint32_t *)(stage1+stage1Length);
	1503	uprv_memcpy(stage2+(fullStage2Length-stage2Length),
	1504	mbcsTable->fromUnicodeTable+stage1Length,
	1505	stage2Length*4);
	1506
	1507	mbcsTable->fromUnicodeTable=stage1;
	1508	mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
	1509
	1510	/* indexes into stage 2 count from the bottom of the fromUnicodeTable */
	1511	stage2=(uint32_t *)stage1;
	1512
	1513	/* reconstitute the initial part of stage 2 from the mbcsIndex */
	1514	{
	1515	int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
	1516	int32_t stageUTF8Index=0;
	1517	int32_t st1, st2, st3, i;
	1518
	1519	for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
	1520	st2=stage1[st1];
	1521	if(st2!=(int32_t)stage1Length/2) {
	1522	/* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
	1523	for(i=0; i<16; ++i) {
	1524	st3=mbcsTable->mbcsIndex[stageUTF8Index++];
	1525	if(st3!=0) {
	1526	/* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
	1527	st3>>=4;
	1528	/*
	1529	* 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
	1530	* allocated together as a single 64-block for access from the mbcsIndex
	1531	*/
	1532	stage2[st2++]=st3++;
	1533	stage2[st2++]=st3++;
	1534	stage2[st2++]=st3++;
	1535	stage2[st2++]=st3;
	1536	} else {
	1537	/* no stage 3 block, skip */
	1538	st2+=4;
	1539	}
	1540	}
	1541	} else {
	1542	/* no stage 2 block, skip */
	1543	stageUTF8Index+=16;
	1544	}
	1545	}
	1546	}
	1547
	1548	/* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
	1549	ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
	1550	}
	1551
	1552	/* MBCS setup functions ----------------------------------------------------- */
	1553
	1554	static void U_CALLCONV
	1555	ucnv_MBCSLoad(UConverterSharedData *sharedData,
	1556	UConverterLoadArgs *pArgs,
	1557	const uint8_t *raw,
	1558	UErrorCode *pErrorCode) {
	1559	UDataInfo info;
	1560	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
	1561	_MBCSHeader header=(_MBCSHeader )raw;
	1562	uint32_t offset;
	1563	uint32_t headerLength;
	1564	UBool noFromU=FALSE;
	1565
	1566	if(header->version[0]==4) {
	1567	headerLength=MBCS_HEADER_V4_LENGTH;
	1568	} else if(header->version[0]==5 && header->version[1]>=3 &&
	1569	(header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
	1570	headerLength=header->options&MBCS_OPT_LENGTH_MASK;
	1571	noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
	1572	} else {
	1573	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1574	return;
	1575	}
	1576
	1577	mbcsTable->outputType=(uint8_t)header->flags;
	1578	if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
	1579	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1580	return;
	1581	}
	1582
	1583	/* extension data, header version 4.2 and higher */
	1584	offset=header->flags>>8;
	1585	if(offset!=0) {
	1586	mbcsTable->extIndexes=(const int32_t *)(raw+offset);
	1587	}
	1588
	1589	if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
	1590	UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER;
	1591	UConverterSharedData *baseSharedData;
	1592	const int32_t *extIndexes;
	1593	const char *baseName;
	1594
	1595	/* extension-only file, load the base table and set values appropriately */
	1596	if((extIndexes=mbcsTable->extIndexes)==NULL) {
	1597	/* extension-only file without extension */
	1598	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1599	return;
	1600	}
	1601
	1602	if(pArgs->nestedLoads!=1) {
	1603	/* an extension table must not be loaded as a base table */
	1604	*pErrorCode=U_INVALID_TABLE_FILE;
	1605	return;
	1606	}
	1607
	1608	/* load the base table */
	1609	baseName=(const char )header+headerLength4;
	1610	if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
	1611	/* forbid loading this same extension-only file */
	1612	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1613	return;
	1614	}
	1615
	1616	/* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
	1617	args.size=sizeof(UConverterLoadArgs);
	1618	args.nestedLoads=2;
	1619	args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
	1620	args.reserved=pArgs->reserved;
	1621	args.options=pArgs->options;
	1622	args.pkg=pArgs->pkg;
	1623	args.name=baseName;
	1624	baseSharedData=ucnv_load(&args, pErrorCode);
	1625	if(U_FAILURE(*pErrorCode)) {
	1626	return;
	1627	}
	1628	if( baseSharedData->staticData->conversionType!=UCNV_MBCS \|\|
	1629	baseSharedData->mbcs.baseSharedData!=NULL
	1630	) {
	1631	ucnv_unload(baseSharedData);
	1632	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1633	return;
	1634	}
	1635	if(pArgs->onlyTestIsLoadable) {
	1636	/*
	1637	* Exit as soon as we know that we can load the converter
	1638	* and the format is valid and supported.
	1639	* The worst that can happen in the following code is a memory
	1640	* allocation error.
	1641	*/
	1642	ucnv_unload(baseSharedData);
	1643	return;
	1644	}
	1645
	1646	/* copy the base table data */
	1647	uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
	1648
	1649	/* overwrite values with relevant ones for the extension converter */
	1650	mbcsTable->baseSharedData=baseSharedData;
	1651	mbcsTable->extIndexes=extIndexes;
	1652
	1653	/*
	1654	* It would be possible to share the swapLFNL data with a base converter,
	1655	* but the generated name would have to be different, and the memory
	1656	* would have to be free'd only once.
	1657	* It is easier to just create the data for the extension converter
	1658	* separately when it is requested.
	1659	*/
	1660	mbcsTable->swapLFNLStateTable=NULL;
	1661	mbcsTable->swapLFNLFromUnicodeBytes=NULL;
	1662	mbcsTable->swapLFNLName=NULL;
	1663
	1664	/*
	1665	* The reconstitutedData must be deleted only when the base converter
	1666	* is unloaded.
	1667	*/
	1668	mbcsTable->reconstitutedData=NULL;
	1669
	1670	/*
	1671	* Set a special, runtime-only outputType if the extension converter
	1672	* is a DBCS version of a base converter that also maps single bytes.
	1673	*/
	1674	if( sharedData->staticData->conversionType==UCNV_DBCS \|\|
	1675	(sharedData->staticData->conversionType==UCNV_MBCS &&
	1676	sharedData->staticData->minBytesPerChar>=2)
	1677	) {
	1678	if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
	1679	/* the base converter is SI/SO-stateful */
	1680	int32_t entry;
	1681
	1682	/* get the dbcs state from the state table entry for SO=0x0e */
	1683	entry=mbcsTable->stateTable[0][0xe];
	1684	if( MBCS_ENTRY_IS_FINAL(entry) &&
	1685	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
	1686	MBCS_ENTRY_FINAL_STATE(entry)!=0
	1687	) {
	1688	mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
	1689
	1690	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
	1691	}
	1692	} else if(
	1693	baseSharedData->staticData->conversionType==UCNV_MBCS &&
	1694	baseSharedData->staticData->minBytesPerChar==1 &&
	1695	baseSharedData->staticData->maxBytesPerChar==2 &&
	1696	mbcsTable->countStates<=127
	1697	) {
	1698	/* non-stateful base converter, need to modify the state table */
	1699	int32_t (*newStateTable)[256];
	1700	int32_t *state;
	1701	int32_t i, count;
	1702
	1703	/* allocate a new state table and copy the base state table contents */
	1704	count=mbcsTable->countStates;
	1705	newStateTable=(int32_t ()[256])uprv_malloc((count+1)1024);
	1706	if(newStateTable==NULL) {
	1707	ucnv_unload(baseSharedData);
	1708	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	1709	return;
	1710	}
	1711
	1712	uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024);
	1713
	1714	/* change all final single-byte entries to go to a new all-illegal state */
	1715	state=newStateTable[0];
	1716	for(i=0; i<256; ++i) {
	1717	if(MBCS_ENTRY_IS_FINAL(state[i])) {
	1718	state[i]=MBCS_ENTRY_TRANSITION(count, 0);
	1719	}
	1720	}
	1721
	1722	/* build the new all-illegal state */
	1723	state=newStateTable[count];
	1724	for(i=0; i<256; ++i) {
	1725	state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
	1726	}
	1727	mbcsTable->stateTable=(const int32_t (*)[256])newStateTable;
	1728	mbcsTable->countStates=(uint8_t)(count+1);
	1729	mbcsTable->stateTableOwned=TRUE;
	1730
	1731	mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
	1732	}
	1733	}
	1734
	1735	/*
	1736	* unlike below for files with base tables, do not get the unicodeMask
	1737	* from the sharedData; instead, use the base table's unicodeMask,
	1738	* which we copied in the memcpy above;
	1739	* this is necessary because the static data unicodeMask, especially
	1740	* the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
	1741	*/
	1742	} else {
	1743	/* conversion file with a base table; an additional extension table is optional */
	1744	/* make sure that the output type is known */
	1745	switch(mbcsTable->outputType) {
	1746	case MBCS_OUTPUT_1:
	1747	case MBCS_OUTPUT_2:
	1748	case MBCS_OUTPUT_3:
	1749	case MBCS_OUTPUT_4:
	1750	case MBCS_OUTPUT_3_EUC:
	1751	case MBCS_OUTPUT_4_EUC:
	1752	case MBCS_OUTPUT_2_SISO:
	1753	/* OK */
	1754	break;
	1755	default:
	1756	*pErrorCode=U_INVALID_TABLE_FORMAT;
	1757	return;
	1758	}
	1759	if(pArgs->onlyTestIsLoadable) {
	1760	/*
	1761	* Exit as soon as we know that we can load the converter
	1762	* and the format is valid and supported.
	1763	* The worst that can happen in the following code is a memory
	1764	* allocation error.
	1765	*/
	1766	return;
	1767	}
	1768
	1769	mbcsTable->countStates=(uint8_t)header->countStates;
	1770	mbcsTable->countToUFallbacks=header->countToUFallbacks;
	1771	mbcsTable->stateTable=(const int32_t ()[256])(raw+headerLength4);
	1772	mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
	1773	mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
	1774
	1775	mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
	1776	mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
	1777	mbcsTable->fromUBytesLength=header->fromUBytesLength;
	1778
	1779	/*
	1780	* converter versions 6.1 and up contain a unicodeMask that is
	1781	* used here to select the most efficient function implementations
	1782	*/
	1783	info.size=sizeof(UDataInfo);
	1784	udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
	1785	if(info.formatVersion[0]>6 \|\| (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
	1786	/* mask off possible future extensions to be safe */
	1787	mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
	1788	} else {
	1789	/* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
	1790	mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY\|UCNV_HAS_SURROGATES;
	1791	}
	1792
	1793	/*
	1794	* _MBCSHeader.version 4.3 adds utf8Friendly data structures.
	1795	* Check for the header version, SBCS vs. MBCS, and for whether the
	1796	* data structures are optimized for code points as high as what the
	1797	* runtime code is designed for.
	1798	* The implementation does not handle mapping tables with entries for
	1799	* unpaired surrogates.
	1800	*/
	1801	if( header->version[1]>=3 &&
	1802	(mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 &&
	1803	(mbcsTable->countStates==1 ?
	1804	(header->version[2]>=(SBCS_FAST_MAX>>8)) :
	1805	(header->version[2]>=(MBCS_FAST_MAX>>8))
	1806	)
	1807	) {
	1808	mbcsTable->utf8Friendly=TRUE;
	1809
	1810	if(mbcsTable->countStates==1) {
	1811	/*
	1812	* SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
	1813	* Build a table with indexes to each block, to be used instead of
	1814	* the regular stage 1/2 table.
	1815	*/
	1816	int32_t i;
	1817	for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) {
	1818	mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
	1819	}
	1820	/* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */
	1821	mbcsTable->maxFastUChar=SBCS_FAST_MAX;
	1822	} else {
	1823	/*
	1824	* MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
	1825	* The .cnv file is prebuilt with an additional stage table with indexes
	1826	* to each block.
	1827	*/
	1828	mbcsTable->mbcsIndex=(const uint16_t *)
	1829	(mbcsTable->fromUnicodeBytes+
	1830	(noFromU ? 0 : mbcsTable->fromUBytesLength));
	1831	mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)\|0xff;
	1832	}
	1833	}
	1834
	1835	/* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
	1836	{
	1837	uint32_t asciiRoundtrips=0xffffffff;
	1838	int32_t i;
	1839
	1840	for(i=0; i<0x80; ++i) {
	1841	if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
	1842	asciiRoundtrips&=~((uint32_t)1<<(i>>2));
	1843	}
	1844	}
	1845	mbcsTable->asciiRoundtrips=asciiRoundtrips;
	1846	}
	1847
	1848	if(noFromU) {
	1849	uint32_t stage1Length=
	1850	mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ?
	1851	0x440 : 0x40;
	1852	uint32_t stage2Length=
	1853	(header->offsetFromUBytes-header->offsetFromUTable)/4-
	1854	stage1Length/2;
	1855	reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
	1856	}
	1857	}
	1858
	1859	/* Set the impl pointer here so that it is set for both extension-only and base tables. */
	1860	if(mbcsTable->utf8Friendly) {
	1861	if(mbcsTable->countStates==1) {
	1862	sharedData->impl=&_SBCSUTF8Impl;
	1863	} else {
	1864	if(mbcsTable->outputType==MBCS_OUTPUT_2) {
	1865	sharedData->impl=&_DBCSUTF8Impl;
	1866	}
	1867	}
	1868	}
	1869
	1870	if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY \|\| mbcsTable->outputType==MBCS_OUTPUT_2_SISO) {
	1871	/*
	1872	* MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
	1873	* MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
	1874	*/
	1875	mbcsTable->asciiRoundtrips=0;
	1876	}
	1877	}
	1878
	1879	static void U_CALLCONV
	1880	ucnv_MBCSUnload(UConverterSharedData *sharedData) {
	1881	UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
	1882
	1883	if(mbcsTable->swapLFNLStateTable!=NULL) {
	1884	uprv_free(mbcsTable->swapLFNLStateTable);
	1885	}
	1886	if(mbcsTable->stateTableOwned) {
	1887	uprv_free((void *)mbcsTable->stateTable);
	1888	}
	1889	if(mbcsTable->baseSharedData!=NULL) {
	1890	ucnv_unload(mbcsTable->baseSharedData);
	1891	}
	1892	if(mbcsTable->reconstitutedData!=NULL) {
	1893	uprv_free(mbcsTable->reconstitutedData);
	1894	}
	1895	}
	1896
	1897	static void U_CALLCONV
	1898	ucnv_MBCSOpen(UConverter *cnv,
	1899	UConverterLoadArgs *pArgs,
	1900	UErrorCode *pErrorCode) {
	1901	UConverterMBCSTable *mbcsTable;
	1902	const int32_t *extIndexes;
	1903	uint8_t outputType;
	1904	int8_t maxBytesPerUChar;
	1905
	1906	if(pArgs->onlyTestIsLoadable) {
	1907	return;
	1908	}
	1909
	1910	mbcsTable=&cnv->sharedData->mbcs;
	1911	outputType=mbcsTable->outputType;
	1912
	1913	if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
	1914	/* the swaplfnl option does not apply, remove it */
	1915	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
	1916	}
	1917
	1918	if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	1919	/* do this because double-checked locking is broken */
	1920	UBool isCached;
	1921
	1922	icu::umtx_lock(NULL);
	1923	isCached=mbcsTable->swapLFNLStateTable!=NULL;
	1924	icu::umtx_unlock(NULL);
	1925
	1926	if(!isCached) {
	1927	if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
	1928	if(U_FAILURE(*pErrorCode)) {
	1929	return; /* something went wrong */
	1930	}
	1931
	1932	/* the option does not apply, remove it */
	1933	cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
	1934	}
	1935	}
	1936	}
	1937
	1938	if(uprv_strstr(pArgs->name, "18030")!=NULL) {
	1939	if(uprv_strstr(pArgs->name, "gb18030")!=NULL \|\| uprv_strstr(pArgs->name, "GB18030")!=NULL) {
	1940	/* set a flag for GB 18030 mode, which changes the callback behavior */
	1941	cnv->options\|=_MBCS_OPTION_GB18030;
	1942	}
	1943	} else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) \|\| (uprv_strstr(pArgs->name, "keis")!=NULL)) {
	1944	/* set a flag for KEIS converter, which changes the SI/SO character sequence */
	1945	cnv->options\|=_MBCS_OPTION_KEIS;
	1946	} else if((uprv_strstr(pArgs->name, "JEF")!=NULL) \|\| (uprv_strstr(pArgs->name, "jef")!=NULL)) {
	1947	/* set a flag for JEF converter, which changes the SI/SO character sequence */
	1948	cnv->options\|=_MBCS_OPTION_JEF;
	1949	} else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) \|\| (uprv_strstr(pArgs->name, "jips")!=NULL)) {
	1950	/* set a flag for JIPS converter, which changes the SI/SO character sequence */
	1951	cnv->options\|=_MBCS_OPTION_JIPS;
	1952	}
	1953
	1954	/* fix maxBytesPerUChar depending on outputType and options etc. */
	1955	if(outputType==MBCS_OUTPUT_2_SISO) {
	1956	cnv->maxBytesPerUChar=3; /* SO+DBCS */
	1957	}
	1958
	1959	extIndexes=mbcsTable->extIndexes;
	1960	if(extIndexes!=NULL) {
	1961	maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
	1962	if(outputType==MBCS_OUTPUT_2_SISO) {
	1963	++maxBytesPerUChar; /* SO + multiple DBCS */
	1964	}
	1965
	1966	if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
	1967	cnv->maxBytesPerUChar=maxBytesPerUChar;
	1968	}
	1969	}
	1970
	1971	#if 0
	1972	/*
	1973	* documentation of UConverter fields used for status
	1974	* all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
	1975	*/
	1976
	1977	/* toUnicode */
	1978	cnv->toUnicodeStatus=0; /* offset */
	1979	cnv->mode=0; /* state */
	1980	cnv->toULength=0; /* byteIndex */
	1981
	1982	/* fromUnicode */
	1983	cnv->fromUChar32=0;
	1984	cnv->fromUnicodeStatus=1; /* prevLength */
	1985	#endif
	1986	}
	1987
	1988	U_CDECL_BEGIN
	1989
	1990	static const char* U_CALLCONV
	1991	ucnv_MBCSGetName(const UConverter *cnv) {
	1992	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
	1993	return cnv->sharedData->mbcs.swapLFNLName;
	1994	} else {
	1995	return cnv->sharedData->staticData->name;
	1996	}
	1997	}
	1998	U_CDECL_END
	1999
	2000
	2001	/* MBCS-to-Unicode conversion functions ------------------------------------- */
	2002
	2003	static UChar32 U_CALLCONV
	2004	ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
	2005	const _MBCSToUFallback *toUFallbacks;
	2006	uint32_t i, start, limit;
	2007
	2008	limit=mbcsTable->countToUFallbacks;
	2009	if(limit>0) {
	2010	/* do a binary search for the fallback mapping */
	2011	toUFallbacks=mbcsTable->toUFallbacks;
	2012	start=0;
	2013	while(start<limit-1) {
	2014	i=(start+limit)/2;
	2015	if(offset<toUFallbacks[i].offset) {
	2016	limit=i;
	2017	} else {
	2018	start=i;
	2019	}
	2020	}
	2021
	2022	/* did we really find it? */
	2023	if(offset==toUFallbacks[start].offset) {
	2024	return toUFallbacks[start].codePoint;
	2025	}
	2026	}
	2027
	2028	return 0xfffe;
	2029	}
	2030
	2031	/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
	2032	static void
	2033	ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	2034	UErrorCode *pErrorCode) {
	2035	UConverter *cnv;
	2036	const uint8_t source, sourceLimit;
	2037	UChar *target;
	2038	const UChar *targetLimit;
	2039	int32_t *offsets;
	2040
	2041	const int32_t (*stateTable)[256];
	2042
	2043	int32_t sourceIndex;
	2044
	2045	int32_t entry;
	2046	UChar c;
	2047	uint8_t action;
	2048
	2049	/* set up the local pointers */
	2050	cnv=pArgs->converter;
	2051	source=(const uint8_t *)pArgs->source;
	2052	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2053	target=pArgs->target;
	2054	targetLimit=pArgs->targetLimit;
	2055	offsets=pArgs->offsets;
	2056
	2057	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2058	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2059	} else {
	2060	stateTable=cnv->sharedData->mbcs.stateTable;
	2061	}
	2062
	2063	/* sourceIndex=-1 if the current character began in the previous buffer */
	2064	sourceIndex=0;
	2065
	2066	/* conversion loop */
	2067	while(source<sourceLimit) {
	2068	/*
	2069	* This following test is to see if available input would overflow the output.
	2070	* It does not catch output of more than one code unit that
	2071	* overflows as a result of a surrogate pair or callback output
	2072	* from the last source byte.
	2073	* Therefore, those situations also test for overflows and will
	2074	* then break the loop, too.
	2075	*/
	2076	if(target>=targetLimit) {
	2077	/* target is full */
	2078	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2079	break;
	2080	}
	2081
	2082	entry=stateTable[0][*source++];
	2083	/* MBCS_ENTRY_IS_FINAL(entry) */
	2084
	2085	/* test the most common case first */
	2086	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2087	/* output BMP code point */
	2088	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2089	if(offsets!=NULL) {
	2090	*offsets++=sourceIndex;
	2091	}
	2092
	2093	/* normal end of action codes: prepare for a new character */
	2094	++sourceIndex;
	2095	continue;
	2096	}
	2097
	2098	/*
	2099	* An if-else-if chain provides more reliable performance for
	2100	* the most common cases compared to a switch.
	2101	*/
	2102	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2103	if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
	2104	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	2105	) {
	2106	entry=MBCS_ENTRY_FINAL_VALUE(entry);
	2107	/* output surrogate pair */
	2108	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
	2109	if(offsets!=NULL) {
	2110	*offsets++=sourceIndex;
	2111	}
	2112	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
	2113	if(target<targetLimit) {
	2114	*target++=c;
	2115	if(offsets!=NULL) {
	2116	*offsets++=sourceIndex;
	2117	}
	2118	} else {
	2119	/* target overflow */
	2120	cnv->UCharErrorBuffer[0]=c;
	2121	cnv->UCharErrorBufferLength=1;
	2122	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2123	break;
	2124	}
	2125
	2126	++sourceIndex;
	2127	continue;
	2128	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2129	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2130	/* output BMP code point */
	2131	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2132	if(offsets!=NULL) {
	2133	*offsets++=sourceIndex;
	2134	}
	2135
	2136	++sourceIndex;
	2137	continue;
	2138	}
	2139	} else if(action==MBCS_STATE_UNASSIGNED) {
	2140	/* just fall through */
	2141	} else if(action==MBCS_STATE_ILLEGAL) {
	2142	/* callback(illegal) */
	2143	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2144	} else {
	2145	/* reserved, must never occur */
	2146	++sourceIndex;
	2147	continue;
	2148	}
	2149
	2150	if(U_FAILURE(*pErrorCode)) {
	2151	/* callback(illegal) */
	2152	break;
	2153	} else /* unassigned sequences indicated with byteIndex>0 */ {
	2154	/* try an extension mapping */
	2155	pArgs->source=(const char *)source;
	2156	cnv->toUBytes[0]=*(source-1);
	2157	cnv->toULength=_extToU(cnv, cnv->sharedData,
	2158	1, &source, sourceLimit,
	2159	&target, targetLimit,
	2160	&offsets, sourceIndex,
	2161	pArgs->flush,
	2162	pErrorCode);
	2163	sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source);
	2164
	2165	if(U_FAILURE(*pErrorCode)) {
	2166	/* not mappable or buffer overflow */
	2167	break;
	2168	}
	2169	}
	2170	}
	2171
	2172	/* write back the updated pointers */
	2173	pArgs->source=(const char *)source;
	2174	pArgs->target=target;
	2175	pArgs->offsets=offsets;
	2176	}
	2177
	2178	/*
	2179	* This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
	2180	* that only map to and from the BMP.
	2181	* In addition to single-byte optimizations, the offset calculations
	2182	* become much easier.
	2183	*/
	2184	static void
	2185	ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
	2186	UErrorCode *pErrorCode) {
	2187	UConverter *cnv;
	2188	const uint8_t source, sourceLimit, *lastSource;
	2189	UChar *target;
	2190	int32_t targetCapacity, length;
	2191	int32_t *offsets;
	2192
	2193	const int32_t (*stateTable)[256];
	2194
	2195	int32_t sourceIndex;
	2196
	2197	int32_t entry;
	2198	uint8_t action;
	2199
	2200	/* set up the local pointers */
	2201	cnv=pArgs->converter;
	2202	source=(const uint8_t *)pArgs->source;
	2203	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2204	target=pArgs->target;
	2205	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	2206	offsets=pArgs->offsets;
	2207
	2208	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2209	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2210	} else {
	2211	stateTable=cnv->sharedData->mbcs.stateTable;
	2212	}
	2213
	2214	/* sourceIndex=-1 if the current character began in the previous buffer */
	2215	sourceIndex=0;
	2216	lastSource=source;
	2217
	2218	/*
	2219	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
	2220	* for the minimum of the sourceLength and targetCapacity
	2221	*/
	2222	length=(int32_t)(sourceLimit-source);
	2223	if(length<targetCapacity) {
	2224	targetCapacity=length;
	2225	}
	2226
	2227	#if MBCS_UNROLL_SINGLE_TO_BMP
	2228	/* unrolling makes it faster on Pentium III/Windows 2000 */
	2229	/* unroll the loop with the most common case */
	2230	unrolled:
	2231	if(targetCapacity>=16) {
	2232	int32_t count, loops, oredEntries;
	2233
	2234	loops=count=targetCapacity>>4;
	2235	do {
	2236	oredEntries=entry=stateTable[0][*source++];
	2237	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2238	oredEntries\|=entry=stateTable[0][*source++];
	2239	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2240	oredEntries\|=entry=stateTable[0][*source++];
	2241	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2242	oredEntries\|=entry=stateTable[0][*source++];
	2243	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2244	oredEntries\|=entry=stateTable[0][*source++];
	2245	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2246	oredEntries\|=entry=stateTable[0][*source++];
	2247	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2248	oredEntries\|=entry=stateTable[0][*source++];
	2249	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2250	oredEntries\|=entry=stateTable[0][*source++];
	2251	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2252	oredEntries\|=entry=stateTable[0][*source++];
	2253	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2254	oredEntries\|=entry=stateTable[0][*source++];
	2255	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2256	oredEntries\|=entry=stateTable[0][*source++];
	2257	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2258	oredEntries\|=entry=stateTable[0][*source++];
	2259	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2260	oredEntries\|=entry=stateTable[0][*source++];
	2261	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2262	oredEntries\|=entry=stateTable[0][*source++];
	2263	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2264	oredEntries\|=entry=stateTable[0][*source++];
	2265	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2266	oredEntries\|=entry=stateTable[0][*source++];
	2267	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2268
	2269	/* were all 16 entries really valid? */
	2270	if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) {
	2271	/* no, return to the first of these 16 */
	2272	source-=16;
	2273	target-=16;
	2274	break;
	2275	}
	2276	} while(--count>0);
	2277	count=loops-count;
	2278	targetCapacity-=16*count;
	2279
	2280	if(offsets!=NULL) {
	2281	lastSource+=16*count;
	2282	while(count>0) {
	2283	*offsets++=sourceIndex++;
	2284	*offsets++=sourceIndex++;
	2285	*offsets++=sourceIndex++;
	2286	*offsets++=sourceIndex++;
	2287	*offsets++=sourceIndex++;
	2288	*offsets++=sourceIndex++;
	2289	*offsets++=sourceIndex++;
	2290	*offsets++=sourceIndex++;
	2291	*offsets++=sourceIndex++;
	2292	*offsets++=sourceIndex++;
	2293	*offsets++=sourceIndex++;
	2294	*offsets++=sourceIndex++;
	2295	*offsets++=sourceIndex++;
	2296	*offsets++=sourceIndex++;
	2297	*offsets++=sourceIndex++;
	2298	*offsets++=sourceIndex++;
	2299	--count;
	2300	}
	2301	}
	2302	}
	2303	#endif
	2304
	2305	/* conversion loop */
	2306	while(targetCapacity > 0 && source < sourceLimit) {
	2307	entry=stateTable[0][*source++];
	2308	/* MBCS_ENTRY_IS_FINAL(entry) */
	2309
	2310	/* test the most common case first */
	2311	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2312	/* output BMP code point */
	2313	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2314	--targetCapacity;
	2315	continue;
	2316	}
	2317
	2318	/*
	2319	* An if-else-if chain provides more reliable performance for
	2320	* the most common cases compared to a switch.
	2321	*/
	2322	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2323	if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2324	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2325	/* output BMP code point */
	2326	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2327	--targetCapacity;
	2328	continue;
	2329	}
	2330	} else if(action==MBCS_STATE_UNASSIGNED) {
	2331	/* just fall through */
	2332	} else if(action==MBCS_STATE_ILLEGAL) {
	2333	/* callback(illegal) */
	2334	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2335	} else {
	2336	/* reserved, must never occur */
	2337	continue;
	2338	}
	2339
	2340	/* set offsets since the start or the last extension */
	2341	if(offsets!=NULL) {
	2342	int32_t count=(int32_t)(source-lastSource);
	2343
	2344	/* predecrement: do not set the offset for the callback-causing character */
	2345	while(--count>0) {
	2346	*offsets++=sourceIndex++;
	2347	}
	2348	/* offset and sourceIndex are now set for the current character */
	2349	}
	2350
	2351	if(U_FAILURE(*pErrorCode)) {
	2352	/* callback(illegal) */
	2353	break;
	2354	} else /* unassigned sequences indicated with byteIndex>0 */ {
	2355	/* try an extension mapping */
	2356	lastSource=source;
	2357	cnv->toUBytes[0]=*(source-1);
	2358	cnv->toULength=_extToU(cnv, cnv->sharedData,
	2359	1, &source, sourceLimit,
	2360	&target, pArgs->targetLimit,
	2361	&offsets, sourceIndex,
	2362	pArgs->flush,
	2363	pErrorCode);
	2364	sourceIndex+=1+(int32_t)(source-lastSource);
	2365
	2366	if(U_FAILURE(*pErrorCode)) {
	2367	/* not mappable or buffer overflow */
	2368	break;
	2369	}
	2370
	2371	/* recalculate the targetCapacity after an extension mapping */
	2372	targetCapacity=(int32_t)(pArgs->targetLimit-target);
	2373	length=(int32_t)(sourceLimit-source);
	2374	if(length<targetCapacity) {
	2375	targetCapacity=length;
	2376	}
	2377	}
	2378
	2379	#if MBCS_UNROLL_SINGLE_TO_BMP
	2380	/* unrolling makes it faster on Pentium III/Windows 2000 */
	2381	goto unrolled;
	2382	#endif
	2383	}
	2384
	2385	if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
	2386	/* target is full */
	2387	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2388	}
	2389
	2390	/* set offsets since the start or the last callback */
	2391	if(offsets!=NULL) {
	2392	size_t count=source-lastSource;
	2393	while(count>0) {
	2394	*offsets++=sourceIndex++;
	2395	--count;
	2396	}
	2397	}
	2398
	2399	/* write back the updated pointers */
	2400	pArgs->source=(const char *)source;
	2401	pArgs->target=target;
	2402	pArgs->offsets=offsets;
	2403	}
	2404
	2405	static UBool
	2406	hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
	2407	const int32_t *row=stateTable[state];
	2408	int32_t b, entry;
	2409	/* First test for final entries in this state for some commonly valid byte values. */
	2410	entry=row[0xa1];
	2411	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
	2412	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
	2413	) {
	2414	return TRUE;
	2415	}
	2416	entry=row[0x41];
	2417	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
	2418	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
	2419	) {
	2420	return TRUE;
	2421	}
	2422	/* Then test for final entries in this state. */
	2423	for(b=0; b<=0xff; ++b) {
	2424	entry=row[b];
	2425	if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
	2426	MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
	2427	) {
	2428	return TRUE;
	2429	}
	2430	}
	2431	/* Then recurse for transition entries. */
	2432	for(b=0; b<=0xff; ++b) {
	2433	entry=row[b];
	2434	if( MBCS_ENTRY_IS_TRANSITION(entry) &&
	2435	hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
	2436	) {
	2437	return TRUE;
	2438	}
	2439	}
	2440	return FALSE;
	2441	}
	2442
	2443	/*
	2444	* Is byte b a single/lead byte in this state?
	2445	* Recurse for transition states, because here we don't want to say that
	2446	* b is a lead byte if all byte sequences that start with b are illegal.
	2447	*/
	2448	static UBool
	2449	isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
	2450	const int32_t *row=stateTable[state];
	2451	int32_t entry=row[b];
	2452	if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
	2453	return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
	2454	} else {
	2455	uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2456	if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
	2457	return FALSE; /* SI/SO are illegal for DBCS-only conversion */
	2458	} else {
	2459	return action!=MBCS_STATE_ILLEGAL;
	2460	}
	2461	}
	2462	}
	2463
	2464	U_CFUNC void
	2465	ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
	2466	UErrorCode *pErrorCode) {
	2467	UConverter *cnv;
	2468	const uint8_t source, sourceLimit;
	2469	UChar *target;
	2470	const UChar *targetLimit;
	2471	int32_t *offsets;
	2472
	2473	const int32_t (*stateTable)[256];
	2474	const uint16_t *unicodeCodeUnits;
	2475
	2476	uint32_t offset;
	2477	uint8_t state;
	2478	int8_t byteIndex;
	2479	uint8_t *bytes;
	2480
	2481	int32_t sourceIndex, nextSourceIndex;
	2482
	2483	int32_t entry;
	2484	UChar c;
	2485	uint8_t action;
	2486
	2487	/* use optimized function if possible */
	2488	cnv=pArgs->converter;
	2489
	2490	if(cnv->preToULength>0) {
	2491	/*
	2492	* pass sourceIndex=-1 because we continue from an earlier buffer
	2493	* in the future, this may change with continuous offsets
	2494	*/
	2495	ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode);
	2496
	2497	if(U_FAILURE(*pErrorCode) \|\| cnv->preToULength<0) {
	2498	return;
	2499	}
	2500	}
	2501
	2502	if(cnv->sharedData->mbcs.countStates==1) {
	2503	if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	2504	ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
	2505	} else {
	2506	ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
	2507	}
	2508	return;
	2509	}
	2510
	2511	/* set up the local pointers */
	2512	source=(const uint8_t *)pArgs->source;
	2513	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2514	target=pArgs->target;
	2515	targetLimit=pArgs->targetLimit;
	2516	offsets=pArgs->offsets;
	2517
	2518	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2519	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2520	} else {
	2521	stateTable=cnv->sharedData->mbcs.stateTable;
	2522	}
	2523	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
	2524
	2525	/* get the converter state from UConverter */
	2526	offset=cnv->toUnicodeStatus;
	2527	byteIndex=cnv->toULength;
	2528	bytes=cnv->toUBytes;
	2529
	2530	/*
	2531	* if we are in the SBCS state for a DBCS-only converter,
	2532	* then load the DBCS state from the MBCS data
	2533	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
	2534	*/
	2535	if((state=(uint8_t)(cnv->mode))==0) {
	2536	state=cnv->sharedData->mbcs.dbcsOnlyState;
	2537	}
	2538
	2539	/* sourceIndex=-1 if the current character began in the previous buffer */
	2540	sourceIndex=byteIndex==0 ? 0 : -1;
	2541	nextSourceIndex=0;
	2542
	2543	/* conversion loop */
	2544	while(source<sourceLimit) {
	2545	/*
	2546	* This following test is to see if available input would overflow the output.
	2547	* It does not catch output of more than one code unit that
	2548	* overflows as a result of a surrogate pair or callback output
	2549	* from the last source byte.
	2550	* Therefore, those situations also test for overflows and will
	2551	* then break the loop, too.
	2552	*/
	2553	if(target>=targetLimit) {
	2554	/* target is full */
	2555	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2556	break;
	2557	}
	2558
	2559	if(byteIndex==0) {
	2560	/* optimized loop for 1/2-byte input and BMP output */
	2561	if(offsets==NULL) {
	2562	do {
	2563	entry=stateTable[state][*source];
	2564	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	2565	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	2566	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	2567
	2568	++source;
	2569	if( source<sourceLimit &&
	2570	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
	2571	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
	2572	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
	2573	) {
	2574	++source;
	2575	*target++=c;
	2576	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2577	offset=0;
	2578	} else {
	2579	/* set the state and leave the optimized loop */
	2580	bytes[0]=*(source-1);
	2581	byteIndex=1;
	2582	break;
	2583	}
	2584	} else {
	2585	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2586	/* output BMP code point */
	2587	++source;
	2588	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2589	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2590	} else {
	2591	/* leave the optimized loop */
	2592	break;
	2593	}
	2594	}
	2595	} while(source<sourceLimit && target<targetLimit);
	2596	} else /* offsets!=NULL */ {
	2597	do {
	2598	entry=stateTable[state][*source];
	2599	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	2600	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	2601	offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	2602
	2603	++source;
	2604	if( source<sourceLimit &&
	2605	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
	2606	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
	2607	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
	2608	) {
	2609	++source;
	2610	*target++=c;
	2611	if(offsets!=NULL) {
	2612	*offsets++=sourceIndex;
	2613	sourceIndex=(nextSourceIndex+=2);
	2614	}
	2615	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2616	offset=0;
	2617	} else {
	2618	/* set the state and leave the optimized loop */
	2619	++nextSourceIndex;
	2620	bytes[0]=*(source-1);
	2621	byteIndex=1;
	2622	break;
	2623	}
	2624	} else {
	2625	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2626	/* output BMP code point */
	2627	++source;
	2628	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2629	if(offsets!=NULL) {
	2630	*offsets++=sourceIndex;
	2631	sourceIndex=++nextSourceIndex;
	2632	}
	2633	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2634	} else {
	2635	/* leave the optimized loop */
	2636	break;
	2637	}
	2638	}
	2639	} while(source<sourceLimit && target<targetLimit);
	2640	}
	2641
	2642	/*
	2643	* these tests and break statements could be put inside the loop
	2644	* if C had "break outerLoop" like Java
	2645	*/
	2646	if(source>=sourceLimit) {
	2647	break;
	2648	}
	2649	if(target>=targetLimit) {
	2650	/* target is full */
	2651	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2652	break;
	2653	}
	2654
	2655	++nextSourceIndex;
	2656	bytes[byteIndex++]=*source++;
	2657	} else /* byteIndex>0 */ {
	2658	++nextSourceIndex;
	2659	entry=stateTable[state][bytes[byteIndex++]=*source++];
	2660	}
	2661
	2662	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	2663	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	2664	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	2665	continue;
	2666	}
	2667
	2668	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
	2669	cnv->mode=state;
	2670
	2671	/* set the next state early so that we can reuse the entry variable */
	2672	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	2673
	2674	/*
	2675	* An if-else-if chain provides more reliable performance for
	2676	* the most common cases compared to a switch.
	2677	*/
	2678	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2679	if(action==MBCS_STATE_VALID_16) {
	2680	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	2681	c=unicodeCodeUnits[offset];
	2682	if(c<0xfffe) {
	2683	/* output BMP code point */
	2684	*target++=c;
	2685	if(offsets!=NULL) {
	2686	*offsets++=sourceIndex;
	2687	}
	2688	byteIndex=0;
	2689	} else if(c==0xfffe) {
	2690	if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
	2691	/* output fallback BMP code point */
	2692	*target++=(UChar)entry;
	2693	if(offsets!=NULL) {
	2694	*offsets++=sourceIndex;
	2695	}
	2696	byteIndex=0;
	2697	}
	2698	} else {
	2699	/* callback(illegal) */
	2700	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2701	}
	2702	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
	2703	/* output BMP code point */
	2704	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2705	if(offsets!=NULL) {
	2706	*offsets++=sourceIndex;
	2707	}
	2708	byteIndex=0;
	2709	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	2710	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	2711	c=unicodeCodeUnits[offset++];
	2712	if(c<0xd800) {
	2713	/* output BMP code point below 0xd800 */
	2714	*target++=c;
	2715	if(offsets!=NULL) {
	2716	*offsets++=sourceIndex;
	2717	}
	2718	byteIndex=0;
	2719	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
	2720	/* output roundtrip or fallback surrogate pair */
	2721	*target++=(UChar)(c&0xdbff);
	2722	if(offsets!=NULL) {
	2723	*offsets++=sourceIndex;
	2724	}
	2725	byteIndex=0;
	2726	if(target<targetLimit) {
	2727	*target++=unicodeCodeUnits[offset];
	2728	if(offsets!=NULL) {
	2729	*offsets++=sourceIndex;
	2730	}
	2731	} else {
	2732	/* target overflow */
	2733	cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
	2734	cnv->UCharErrorBufferLength=1;
	2735	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2736
	2737	offset=0;
	2738	break;
	2739	}
	2740	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
	2741	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	2742	*target++=unicodeCodeUnits[offset];
	2743	if(offsets!=NULL) {
	2744	*offsets++=sourceIndex;
	2745	}
	2746	byteIndex=0;
	2747	} else if(c==0xffff) {
	2748	/* callback(illegal) */
	2749	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2750	}
	2751	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
	2752	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	2753	) {
	2754	entry=MBCS_ENTRY_FINAL_VALUE(entry);
	2755	/* output surrogate pair */
	2756	*target++=(UChar)(0xd800\|(UChar)(entry>>10));
	2757	if(offsets!=NULL) {
	2758	*offsets++=sourceIndex;
	2759	}
	2760	byteIndex=0;
	2761	c=(UChar)(0xdc00\|(UChar)(entry&0x3ff));
	2762	if(target<targetLimit) {
	2763	*target++=c;
	2764	if(offsets!=NULL) {
	2765	*offsets++=sourceIndex;
	2766	}
	2767	} else {
	2768	/* target overflow */
	2769	cnv->UCharErrorBuffer[0]=c;
	2770	cnv->UCharErrorBufferLength=1;
	2771	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	2772
	2773	offset=0;
	2774	break;
	2775	}
	2776	} else if(action==MBCS_STATE_CHANGE_ONLY) {
	2777	/*
	2778	* This serves as a state change without any output.
	2779	* It is useful for reading simple stateful encodings,
	2780	* for example using just Shift-In/Shift-Out codes.
	2781	* The 21 unused bits may later be used for more sophisticated
	2782	* state transitions.
	2783	*/
	2784	if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
	2785	byteIndex=0;
	2786	} else {
	2787	/* SI/SO are illegal for DBCS-only conversion */
	2788	state=(uint8_t)(cnv->mode); /* restore the previous state */
	2789
	2790	/* callback(illegal) */
	2791	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2792	}
	2793	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2794	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2795	/* output BMP code point */
	2796	*target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2797	if(offsets!=NULL) {
	2798	*offsets++=sourceIndex;
	2799	}
	2800	byteIndex=0;
	2801	}
	2802	} else if(action==MBCS_STATE_UNASSIGNED) {
	2803	/* just fall through */
	2804	} else if(action==MBCS_STATE_ILLEGAL) {
	2805	/* callback(illegal) */
	2806	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2807	} else {
	2808	/* reserved, must never occur */
	2809	byteIndex=0;
	2810	}
	2811
	2812	/* end of action codes: prepare for a new character */
	2813	offset=0;
	2814
	2815	if(byteIndex==0) {
	2816	sourceIndex=nextSourceIndex;
	2817	} else if(U_FAILURE(*pErrorCode)) {
	2818	/* callback(illegal) */
	2819	if(byteIndex>1) {
	2820	/*
	2821	* Ticket 5691: consistent illegal sequences:
	2822	* - We include at least the first byte in the illegal sequence.
	2823	* - If any of the non-initial bytes could be the start of a character,
	2824	* we stop the illegal sequence before the first one of those.
	2825	*/
	2826	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
	2827	int8_t i;
	2828	for(i=1;
	2829	i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
	2830	++i) {}
	2831	if(i<byteIndex) {
	2832	/* Back out some bytes. */
	2833	int8_t backOutDistance=byteIndex-i;
	2834	int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
	2835	byteIndex=i; /* length of reported illegal byte sequence */
	2836	if(backOutDistance<=bytesFromThisBuffer) {
	2837	source-=backOutDistance;
	2838	} else {
	2839	/* Back out bytes from the previous buffer: Need to replay them. */
	2840	cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
	2841	/* preToULength is negative! */
	2842	uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
	2843	source=(const uint8_t *)pArgs->source;
	2844	}
	2845	}
	2846	}
	2847	break;
	2848	} else /* unassigned sequences indicated with byteIndex>0 */ {
	2849	/* try an extension mapping */
	2850	pArgs->source=(const char *)source;
	2851	byteIndex=_extToU(cnv, cnv->sharedData,
	2852	byteIndex, &source, sourceLimit,
	2853	&target, targetLimit,
	2854	&offsets, sourceIndex,
	2855	pArgs->flush,
	2856	pErrorCode);
	2857	sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
	2858
	2859	if(U_FAILURE(*pErrorCode)) {
	2860	/* not mappable or buffer overflow */
	2861	break;
	2862	}
	2863	}
	2864	}
	2865
	2866	/* set the converter state back into UConverter */
	2867	cnv->toUnicodeStatus=offset;
	2868	cnv->mode=state;
	2869	cnv->toULength=byteIndex;
	2870
	2871	/* write back the updated pointers */
	2872	pArgs->source=(const char *)source;
	2873	pArgs->target=target;
	2874	pArgs->offsets=offsets;
	2875	}
	2876
	2877	/*
	2878	* This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages.
	2879	* We still need a conversion loop in case we find reserved action codes, which are to be ignored.
	2880	*/
	2881	static UChar32
	2882	ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
	2883	UErrorCode *pErrorCode) {
	2884	UConverter *cnv;
	2885	const int32_t (*stateTable)[256];
	2886	const uint8_t source, sourceLimit;
	2887
	2888	int32_t entry;
	2889	uint8_t action;
	2890
	2891	/* set up the local pointers */
	2892	cnv=pArgs->converter;
	2893	source=(const uint8_t *)pArgs->source;
	2894	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	2895	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	2896	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	2897	} else {
	2898	stateTable=cnv->sharedData->mbcs.stateTable;
	2899	}
	2900
	2901	/* conversion loop */
	2902	while(source<sourceLimit) {
	2903	entry=stateTable[0][*source++];
	2904	/* MBCS_ENTRY_IS_FINAL(entry) */
	2905
	2906	/* write back the updated pointer early so that we can return directly */
	2907	pArgs->source=(const char *)source;
	2908
	2909	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	2910	/* output BMP code point */
	2911	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2912	}
	2913
	2914	/*
	2915	* An if-else-if chain provides more reliable performance for
	2916	* the most common cases compared to a switch.
	2917	*/
	2918	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	2919	if( action==MBCS_STATE_VALID_DIRECT_20 \|\|
	2920	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	2921	) {
	2922	/* output supplementary code point */
	2923	return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
	2924	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	2925	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	2926	/* output BMP code point */
	2927	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	2928	}
	2929	} else if(action==MBCS_STATE_UNASSIGNED) {
	2930	/* just fall through */
	2931	} else if(action==MBCS_STATE_ILLEGAL) {
	2932	/* callback(illegal) */
	2933	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	2934	} else {
	2935	/* reserved, must never occur */
	2936	continue;
	2937	}
	2938
	2939	if(U_FAILURE(*pErrorCode)) {
	2940	/* callback(illegal) */
	2941	break;
	2942	} else /* unassigned sequence */ {
	2943	/* defer to the generic implementation */
	2944	pArgs->source=(const char *)source-1;
	2945	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	2946	}
	2947	}
	2948
	2949	/* no output because of empty input or only state changes */
	2950	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	2951	return 0xffff;
	2952	}
	2953
	2954	/*
	2955	* Version of _MBCSToUnicodeWithOffsets() optimized for single-character
	2956	* conversion without offset handling.
	2957	*
	2958	* When a character does not have a mapping to Unicode, then we return to the
	2959	* generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback
	2960	* handling.
	2961	* We also defer to the generic code in other complicated cases and have them
	2962	* ultimately handled by _MBCSToUnicodeWithOffsets() itself.
	2963	*
	2964	* All normal mappings and errors are handled here.
	2965	*/
	2966	static UChar32 U_CALLCONV
	2967	ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
	2968	UErrorCode *pErrorCode) {
	2969	UConverter *cnv;
	2970	const uint8_t source, sourceLimit, *lastSource;
	2971
	2972	const int32_t (*stateTable)[256];
	2973	const uint16_t *unicodeCodeUnits;
	2974
	2975	uint32_t offset;
	2976	uint8_t state;
	2977
	2978	int32_t entry;
	2979	UChar32 c;
	2980	uint8_t action;
	2981
	2982	/* use optimized function if possible */
	2983	cnv=pArgs->converter;
	2984
	2985	if(cnv->preToULength>0) {
	2986	/* use the generic code in ucnv_getNextUChar() to continue with a partial match */
	2987	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	2988	}
	2989
	2990	if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
	2991	/*
	2992	* Using the generic ucnv_getNextUChar() code lets us deal correctly
	2993	* with the rare case of a codepage that maps single surrogates
	2994	* without adding the complexity to this already complicated function here.
	2995	*/
	2996	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	2997	} else if(cnv->sharedData->mbcs.countStates==1) {
	2998	return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode);
	2999	}
	3000
	3001	/* set up the local pointers */
	3002	source=lastSource=(const uint8_t *)pArgs->source;
	3003	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
	3004
	3005	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3006	stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
	3007	} else {
	3008	stateTable=cnv->sharedData->mbcs.stateTable;
	3009	}
	3010	unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
	3011
	3012	/* get the converter state from UConverter */
	3013	offset=cnv->toUnicodeStatus;
	3014
	3015	/*
	3016	* if we are in the SBCS state for a DBCS-only converter,
	3017	* then load the DBCS state from the MBCS data
	3018	* (dbcsOnlyState==0 if it is not a DBCS-only converter)
	3019	*/
	3020	if((state=(uint8_t)(cnv->mode))==0) {
	3021	state=cnv->sharedData->mbcs.dbcsOnlyState;
	3022	}
	3023
	3024	/* conversion loop */
	3025	c=U_SENTINEL;
	3026	while(source<sourceLimit) {
	3027	entry=stateTable[state][*source++];
	3028	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	3029	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	3030	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	3031
	3032	/* optimization for 1/2-byte input and BMP output */
	3033	if( source<sourceLimit &&
	3034	MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
	3035	MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
	3036	(c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
	3037	) {
	3038	++source;
	3039	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	3040	/* output BMP code point */
	3041	break;
	3042	}
	3043	} else {
	3044	/* save the previous state for proper extension mapping with SI/SO-stateful converters */
	3045	cnv->mode=state;
	3046
	3047	/* set the next state early so that we can reuse the entry variable */
	3048	state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
	3049
	3050	/*
	3051	* An if-else-if chain provides more reliable performance for
	3052	* the most common cases compared to a switch.
	3053	*/
	3054	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	3055	if(action==MBCS_STATE_VALID_DIRECT_16) {
	3056	/* output BMP code point */
	3057	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3058	break;
	3059	} else if(action==MBCS_STATE_VALID_16) {
	3060	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	3061	c=unicodeCodeUnits[offset];
	3062	if(c<0xfffe) {
	3063	/* output BMP code point */
	3064	break;
	3065	} else if(c==0xfffe) {
	3066	if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
	3067	break;
	3068	}
	3069	} else {
	3070	/* callback(illegal) */
	3071	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3072	}
	3073	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	3074	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	3075	c=unicodeCodeUnits[offset++];
	3076	if(c<0xd800) {
	3077	/* output BMP code point below 0xd800 */
	3078	break;
	3079	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
	3080	/* output roundtrip or fallback supplementary code point */
	3081	c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
	3082	break;
	3083	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
	3084	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	3085	c=unicodeCodeUnits[offset];
	3086	break;
	3087	} else if(c==0xffff) {
	3088	/* callback(illegal) */
	3089	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3090	}
	3091	} else if(action==MBCS_STATE_VALID_DIRECT_20 \|\|
	3092	(action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
	3093	) {
	3094	/* output supplementary code point */
	3095	c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
	3096	break;
	3097	} else if(action==MBCS_STATE_CHANGE_ONLY) {
	3098	/*
	3099	* This serves as a state change without any output.
	3100	* It is useful for reading simple stateful encodings,
	3101	* for example using just Shift-In/Shift-Out codes.
	3102	* The 21 unused bits may later be used for more sophisticated
	3103	* state transitions.
	3104	*/
	3105	if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
	3106	/* SI/SO are illegal for DBCS-only conversion */
	3107	state=(uint8_t)(cnv->mode); /* restore the previous state */
	3108
	3109	/* callback(illegal) */
	3110	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3111	}
	3112	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	3113	if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	3114	/* output BMP code point */
	3115	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3116	break;
	3117	}
	3118	} else if(action==MBCS_STATE_UNASSIGNED) {
	3119	/* just fall through */
	3120	} else if(action==MBCS_STATE_ILLEGAL) {
	3121	/* callback(illegal) */
	3122	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3123	} else {
	3124	/* reserved (must never occur), or only state change */
	3125	offset=0;
	3126	lastSource=source;
	3127	continue;
	3128	}
	3129
	3130	/* end of action codes: prepare for a new character */
	3131	offset=0;
	3132
	3133	if(U_FAILURE(*pErrorCode)) {
	3134	/* callback(illegal) */
	3135	break;
	3136	} else /* unassigned sequence */ {
	3137	/* defer to the generic implementation */
	3138	cnv->toUnicodeStatus=0;
	3139	cnv->mode=state;
	3140	pArgs->source=(const char *)lastSource;
	3141	return UCNV_GET_NEXT_UCHAR_USE_TO_U;
	3142	}
	3143	}
	3144	}
	3145
	3146	if(c<0) {
	3147	if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
	3148	/* incomplete character byte sequence */
	3149	uint8_t *bytes=cnv->toUBytes;
	3150	cnv->toULength=(int8_t)(source-lastSource);
	3151	do {
	3152	bytes++=lastSource++;
	3153	} while(lastSource<source);
	3154	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
	3155	} else if(U_FAILURE(*pErrorCode)) {
	3156	/* callback(illegal) */
	3157	/*
	3158	* Ticket 5691: consistent illegal sequences:
	3159	* - We include at least the first byte in the illegal sequence.
	3160	* - If any of the non-initial bytes could be the start of a character,
	3161	* we stop the illegal sequence before the first one of those.
	3162	*/
	3163	UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
	3164	uint8_t *bytes=cnv->toUBytes;
	3165	bytes++=lastSource++; /* first byte */
	3166	if(lastSource==source) {
	3167	cnv->toULength=1;
	3168	} else /* lastSource<source: multi-byte character */ {
	3169	int8_t i;
	3170	for(i=1;
	3171	lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
	3172	++i
	3173	) {
	3174	bytes++=lastSource++;
	3175	}
	3176	cnv->toULength=i;
	3177	source=lastSource;
	3178	}
	3179	} else {
	3180	/* no output because of empty input or only state changes */
	3181	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
	3182	}
	3183	c=0xffff;
	3184	}
	3185
	3186	/* set the converter state back into UConverter, ready for a new character */
	3187	cnv->toUnicodeStatus=0;
	3188	cnv->mode=state;
	3189
	3190	/* write back the updated pointer */
	3191	pArgs->source=(const char *)source;
	3192	return c;
	3193	}
	3194
	3195	#if 0
	3196	/*
	3197	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
	3198	* Removal improves code coverage.
	3199	*/
	3200	/**
	3201	* This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
	3202	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	3203	* It does not handle conversion extensions (_extToU()).
	3204	*/
	3205	U_CFUNC UChar32
	3206	ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
	3207	uint8_t b, UBool useFallback) {
	3208	int32_t entry;
	3209	uint8_t action;
	3210
	3211	entry=sharedData->mbcs.stateTable[0][b];
	3212	/* MBCS_ENTRY_IS_FINAL(entry) */
	3213
	3214	if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
	3215	/* output BMP code point */
	3216	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3217	}
	3218
	3219	/*
	3220	* An if-else-if chain provides more reliable performance for
	3221	* the most common cases compared to a switch.
	3222	*/
	3223	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	3224	if(action==MBCS_STATE_VALID_DIRECT_20) {
	3225	/* output supplementary code point */
	3226	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3227	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	3228	if(!TO_U_USE_FALLBACK(useFallback)) {
	3229	return 0xfffe;
	3230	}
	3231	/* output BMP code point */
	3232	return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3233	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
	3234	if(!TO_U_USE_FALLBACK(useFallback)) {
	3235	return 0xfffe;
	3236	}
	3237	/* output supplementary code point */
	3238	return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3239	} else if(action==MBCS_STATE_UNASSIGNED) {
	3240	return 0xfffe;
	3241	} else if(action==MBCS_STATE_ILLEGAL) {
	3242	return 0xffff;
	3243	} else {
	3244	/* reserved, must never occur */
	3245	return 0xffff;
	3246	}
	3247	}
	3248	#endif
	3249
	3250	/*
	3251	* This is a simple version of _MBCSGetNextUChar() that is used
	3252	* by other converter implementations.
	3253	* It only returns an "assigned" result if it consumes the entire input.
	3254	* It does not use state from the converter, nor error codes.
	3255	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	3256	* It handles conversion extensions but not GB 18030.
	3257	*
	3258	* Return value:
	3259	* U+fffe unassigned
	3260	* U+ffff illegal
	3261	* otherwise the Unicode code point
	3262	*/
	3263	U_CFUNC UChar32
	3264	ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
	3265	const char *source, int32_t length,
	3266	UBool useFallback) {
	3267	const int32_t (*stateTable)[256];
	3268	const uint16_t *unicodeCodeUnits;
	3269
	3270	uint32_t offset;
	3271	uint8_t state, action;
	3272
	3273	UChar32 c;
	3274	int32_t i, entry;
	3275
	3276	if(length<=0) {
	3277	/* no input at all: "illegal" */
	3278	return 0xffff;
	3279	}
	3280
	3281	#if 0
	3282	/*
	3283	* Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
	3284	* TODO In future releases, verify that this function is never called for SBCS
	3285	* conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
	3286	* Removal improves code coverage.
	3287	*/
	3288	/* use optimized function if possible */
	3289	if(sharedData->mbcs.countStates==1) {
	3290	if(length==1) {
	3291	return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
	3292	} else {
	3293	return 0xffff; /* illegal: more than a single byte for an SBCS converter */
	3294	}
	3295	}
	3296	#endif
	3297
	3298	/* set up the local pointers */
	3299	stateTable=sharedData->mbcs.stateTable;
	3300	unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
	3301
	3302	/* converter state */
	3303	offset=0;
	3304	state=sharedData->mbcs.dbcsOnlyState;
	3305
	3306	/* conversion loop */
	3307	for(i=0;;) {
	3308	entry=stateTable[state][(uint8_t)source[i++]];
	3309	if(MBCS_ENTRY_IS_TRANSITION(entry)) {
	3310	state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
	3311	offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
	3312
	3313	if(i==length) {
	3314	return 0xffff; /* truncated character */
	3315	}
	3316	} else {
	3317	/*
	3318	* An if-else-if chain provides more reliable performance for
	3319	* the most common cases compared to a switch.
	3320	*/
	3321	action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
	3322	if(action==MBCS_STATE_VALID_16) {
	3323	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	3324	c=unicodeCodeUnits[offset];
	3325	if(c!=0xfffe) {
	3326	/* done */
	3327	} else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
	3328	c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset);
	3329	/* else done with 0xfffe */
	3330	}
	3331	break;
	3332	} else if(action==MBCS_STATE_VALID_DIRECT_16) {
	3333	/* output BMP code point */
	3334	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3335	break;
	3336	} else if(action==MBCS_STATE_VALID_16_PAIR) {
	3337	offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
	3338	c=unicodeCodeUnits[offset++];
	3339	if(c<0xd800) {
	3340	/* output BMP code point below 0xd800 */
	3341	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
	3342	/* output roundtrip or fallback supplementary code point */
	3343	c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
	3344	} else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
	3345	/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
	3346	c=unicodeCodeUnits[offset];
	3347	} else if(c==0xffff) {
	3348	return 0xffff;
	3349	} else {
	3350	c=0xfffe;
	3351	}
	3352	break;
	3353	} else if(action==MBCS_STATE_VALID_DIRECT_20) {
	3354	/* output supplementary code point */
	3355	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3356	break;
	3357	} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
	3358	if(!TO_U_USE_FALLBACK(useFallback)) {
	3359	c=0xfffe;
	3360	break;
	3361	}
	3362	/* output BMP code point */
	3363	c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
	3364	break;
	3365	} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
	3366	if(!TO_U_USE_FALLBACK(useFallback)) {
	3367	c=0xfffe;
	3368	break;
	3369	}
	3370	/* output supplementary code point */
	3371	c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
	3372	break;
	3373	} else if(action==MBCS_STATE_UNASSIGNED) {
	3374	c=0xfffe;
	3375	break;
	3376	}
	3377
	3378	/*
	3379	* forbid MBCS_STATE_CHANGE_ONLY for this function,
	3380	* and MBCS_STATE_ILLEGAL and reserved action codes
	3381	*/
	3382	return 0xffff;
	3383	}
	3384	}
	3385
	3386	if(i!=length) {
	3387	/* illegal for this function: not all input consumed */
	3388	return 0xffff;
	3389	}
	3390
	3391	if(c==0xfffe) {
	3392	/* try an extension mapping */
	3393	const int32_t *cx=sharedData->mbcs.extIndexes;
	3394	if(cx!=NULL) {
	3395	return ucnv_extSimpleMatchToU(cx, source, length, useFallback);
	3396	}
	3397	}
	3398
	3399	return c;
	3400	}
	3401
	3402	/* MBCS-from-Unicode conversion functions ----------------------------------- */
	3403
	3404	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
	3405	static void
	3406	ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	3407	UErrorCode *pErrorCode) {
	3408	UConverter *cnv;
	3409	const UChar source, sourceLimit;
	3410	uint8_t *target;
	3411	int32_t targetCapacity;
	3412	int32_t *offsets;
	3413
	3414	const uint16_t *table;
	3415	const uint16_t *mbcsIndex;
	3416	const uint8_t *bytes;
	3417
	3418	UChar32 c;
	3419
	3420	int32_t sourceIndex, nextSourceIndex;
	3421
	3422	uint32_t stage2Entry;
	3423	uint32_t asciiRoundtrips;
	3424	uint32_t value;
	3425	uint8_t unicodeMask;
	3426
	3427	/* use optimized function if possible */
	3428	cnv=pArgs->converter;
	3429	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
	3430
	3431	/* set up the local pointers */
	3432	source=pArgs->source;
	3433	sourceLimit=pArgs->sourceLimit;
	3434	target=(uint8_t *)pArgs->target;
	3435	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	3436	offsets=pArgs->offsets;
	3437
	3438	table=cnv->sharedData->mbcs.fromUnicodeTable;
	3439	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
	3440	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3441	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	3442	} else {
	3443	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
	3444	}
	3445	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	3446
	3447	/* get the converter state from UConverter */
	3448	c=cnv->fromUChar32;
	3449
	3450	/* sourceIndex=-1 if the current character began in the previous buffer */
	3451	sourceIndex= c==0 ? 0 : -1;
	3452	nextSourceIndex=0;
	3453
	3454	/* conversion loop */
	3455	if(c!=0 && targetCapacity>0) {
	3456	goto getTrail;
	3457	}
	3458
	3459	while(source<sourceLimit) {
	3460	/*
	3461	* This following test is to see if available input would overflow the output.
	3462	* It does not catch output of more than one byte that
	3463	* overflows as a result of a multi-byte character or callback output
	3464	* from the last source character.
	3465	* Therefore, those situations also test for overflows and will
	3466	* then break the loop, too.
	3467	*/
	3468	if(targetCapacity>0) {
	3469	/*
	3470	* Get a correct Unicode code point:
	3471	* a single UChar for a BMP code point or
	3472	* a matched surrogate pair for a "supplementary code point".
	3473	*/
	3474	c=*source++;
	3475	++nextSourceIndex;
	3476	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
	3477	*target++=(uint8_t)c;
	3478	if(offsets!=NULL) {
	3479	*offsets++=sourceIndex;
	3480	sourceIndex=nextSourceIndex;
	3481	}
	3482	--targetCapacity;
	3483	c=0;
	3484	continue;
	3485	}
	3486	/*
	3487	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
	3488	* to avoid dealing with surrogates.
	3489	* MBCS_FAST_MAX must be >=0xd7ff.
	3490	*/
	3491	if(c<=0xd7ff) {
	3492	value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c);
	3493	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
	3494	if(value==0) {
	3495	goto unassigned;
	3496	}
	3497	/* output the value */
	3498	} else {
	3499	/*
	3500	* This also tests if the codepage maps single surrogates.
	3501	* If it does, then surrogates are not paired but mapped separately.
	3502	* Note that in this case unmatched surrogates are not detected.
	3503	*/
	3504	if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
	3505	if(U16_IS_SURROGATE_LEAD(c)) {
	3506	getTrail:
	3507	if(source<sourceLimit) {
	3508	/* test the following code unit */
	3509	UChar trail=*source;
	3510	if(U16_IS_TRAIL(trail)) {
	3511	++source;
	3512	++nextSourceIndex;
	3513	c=U16_GET_SUPPLEMENTARY(c, trail);
	3514	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	3515	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	3516	/* callback(unassigned) */
	3517	goto unassigned;
	3518	}
	3519	/* convert this supplementary code point */
	3520	/* exit this condition tree */
	3521	} else {
	3522	/* this is an unmatched lead code unit (1st surrogate) */
	3523	/* callback(illegal) */
	3524	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3525	break;
	3526	}
	3527	} else {
	3528	/* no more input */
	3529	break;
	3530	}
	3531	} else {
	3532	/* this is an unmatched trail code unit (2nd surrogate) */
	3533	/* callback(illegal) */
	3534	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3535	break;
	3536	}
	3537	}
	3538
	3539	/* convert the Unicode code point in c into codepage bytes */
	3540	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	3541
	3542	/* get the bytes and the length for the output */
	3543	/* MBCS_OUTPUT_2 */
	3544	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	3545
	3546	/* is this code point assigned, or do we use fallbacks? */
	3547	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
	3548	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
	3549	) {
	3550	/*
	3551	* We allow a 0 byte output if the "assigned" bit is set for this entry.
	3552	* There is no way with this data structure for fallback output
	3553	* to be a zero byte.
	3554	*/
	3555
	3556	unassigned:
	3557	/* try an extension mapping */
	3558	pArgs->source=source;
	3559	c=_extFromU(cnv, cnv->sharedData,
	3560	c, &source, sourceLimit,
	3561	&target, target+targetCapacity,
	3562	&offsets, sourceIndex,
	3563	pArgs->flush,
	3564	pErrorCode);
	3565	nextSourceIndex+=(int32_t)(source-pArgs->source);
	3566
	3567	if(U_FAILURE(*pErrorCode)) {
	3568	/* not mappable or buffer overflow */
	3569	break;
	3570	} else {
	3571	/* a mapping was written to the target, continue */
	3572
	3573	/* recalculate the targetCapacity after an extension mapping */
	3574	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	3575
	3576	/* normal end of conversion: prepare for a new character */
	3577	sourceIndex=nextSourceIndex;
	3578	continue;
	3579	}
	3580	}
	3581	}
	3582
	3583	/* write the output character bytes from value and length */
	3584	/* from the first if in the loop we know that targetCapacity>0 */
	3585	if(value<=0xff) {
	3586	/* this is easy because we know that there is enough space */
	3587	*target++=(uint8_t)value;
	3588	if(offsets!=NULL) {
	3589	*offsets++=sourceIndex;
	3590	}
	3591	--targetCapacity;
	3592	} else /* length==2 */ {
	3593	*target++=(uint8_t)(value>>8);
	3594	if(2<=targetCapacity) {
	3595	*target++=(uint8_t)value;
	3596	if(offsets!=NULL) {
	3597	*offsets++=sourceIndex;
	3598	*offsets++=sourceIndex;
	3599	}
	3600	targetCapacity-=2;
	3601	} else {
	3602	if(offsets!=NULL) {
	3603	*offsets++=sourceIndex;
	3604	}
	3605	cnv->charErrorBuffer[0]=(char)value;
	3606	cnv->charErrorBufferLength=1;
	3607
	3608	/* target overflow */
	3609	targetCapacity=0;
	3610	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	3611	c=0;
	3612	break;
	3613	}
	3614	}
	3615
	3616	/* normal end of conversion: prepare for a new character */
	3617	c=0;
	3618	sourceIndex=nextSourceIndex;
	3619	continue;
	3620	} else {
	3621	/* target is full */
	3622	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	3623	break;
	3624	}
	3625	}
	3626
	3627	/* set the converter state back into UConverter */
	3628	cnv->fromUChar32=c;
	3629
	3630	/* write back the updated pointers */
	3631	pArgs->source=source;
	3632	pArgs->target=(char *)target;
	3633	pArgs->offsets=offsets;
	3634	}
	3635
	3636	/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
	3637	static void
	3638	ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	3639	UErrorCode *pErrorCode) {
	3640	UConverter *cnv;
	3641	const UChar source, sourceLimit;
	3642	uint8_t *target;
	3643	int32_t targetCapacity;
	3644	int32_t *offsets;
	3645
	3646	const uint16_t *table;
	3647	const uint16_t *results;
	3648
	3649	UChar32 c;
	3650
	3651	int32_t sourceIndex, nextSourceIndex;
	3652
	3653	uint16_t value, minValue;
	3654	UBool hasSupplementary;
	3655
	3656	/* set up the local pointers */
	3657	cnv=pArgs->converter;
	3658	source=pArgs->source;
	3659	sourceLimit=pArgs->sourceLimit;
	3660	target=(uint8_t *)pArgs->target;
	3661	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	3662	offsets=pArgs->offsets;
	3663
	3664	table=cnv->sharedData->mbcs.fromUnicodeTable;
	3665	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3666	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	3667	} else {
	3668	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	3669	}
	3670
	3671	if(cnv->useFallback) {
	3672	/* use all roundtrip and fallback results */
	3673	minValue=0x800;
	3674	} else {
	3675	/* use only roundtrips and fallbacks from private-use characters */
	3676	minValue=0xc00;
	3677	}
	3678	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
	3679
	3680	/* get the converter state from UConverter */
	3681	c=cnv->fromUChar32;
	3682
	3683	/* sourceIndex=-1 if the current character began in the previous buffer */
	3684	sourceIndex= c==0 ? 0 : -1;
	3685	nextSourceIndex=0;
	3686
	3687	/* conversion loop */
	3688	if(c!=0 && targetCapacity>0) {
	3689	goto getTrail;
	3690	}
	3691
	3692	while(source<sourceLimit) {
	3693	/*
	3694	* This following test is to see if available input would overflow the output.
	3695	* It does not catch output of more than one byte that
	3696	* overflows as a result of a multi-byte character or callback output
	3697	* from the last source character.
	3698	* Therefore, those situations also test for overflows and will
	3699	* then break the loop, too.
	3700	*/
	3701	if(targetCapacity>0) {
	3702	/*
	3703	* Get a correct Unicode code point:
	3704	* a single UChar for a BMP code point or
	3705	* a matched surrogate pair for a "supplementary code point".
	3706	*/
	3707	c=*source++;
	3708	++nextSourceIndex;
	3709	if(U16_IS_SURROGATE(c)) {
	3710	if(U16_IS_SURROGATE_LEAD(c)) {
	3711	getTrail:
	3712	if(source<sourceLimit) {
	3713	/* test the following code unit */
	3714	UChar trail=*source;
	3715	if(U16_IS_TRAIL(trail)) {
	3716	++source;
	3717	++nextSourceIndex;
	3718	c=U16_GET_SUPPLEMENTARY(c, trail);
	3719	if(!hasSupplementary) {
	3720	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	3721	/* callback(unassigned) */
	3722	goto unassigned;
	3723	}
	3724	/* convert this supplementary code point */
	3725	/* exit this condition tree */
	3726	} else {
	3727	/* this is an unmatched lead code unit (1st surrogate) */
	3728	/* callback(illegal) */
	3729	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3730	break;
	3731	}
	3732	} else {
	3733	/* no more input */
	3734	break;
	3735	}
	3736	} else {
	3737	/* this is an unmatched trail code unit (2nd surrogate) */
	3738	/* callback(illegal) */
	3739	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3740	break;
	3741	}
	3742	}
	3743
	3744	/* convert the Unicode code point in c into codepage bytes */
	3745	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3746
	3747	/* is this code point assigned, or do we use fallbacks? */
	3748	if(value>=minValue) {
	3749	/* assigned, write the output character bytes from value and length */
	3750	/* length==1 */
	3751	/* this is easy because we know that there is enough space */
	3752	*target++=(uint8_t)value;
	3753	if(offsets!=NULL) {
	3754	*offsets++=sourceIndex;
	3755	}
	3756	--targetCapacity;
	3757
	3758	/* normal end of conversion: prepare for a new character */
	3759	c=0;
	3760	sourceIndex=nextSourceIndex;
	3761	} else { /* unassigned */
	3762	unassigned:
	3763	/* try an extension mapping */
	3764	pArgs->source=source;
	3765	c=_extFromU(cnv, cnv->sharedData,
	3766	c, &source, sourceLimit,
	3767	&target, target+targetCapacity,
	3768	&offsets, sourceIndex,
	3769	pArgs->flush,
	3770	pErrorCode);
	3771	nextSourceIndex+=(int32_t)(source-pArgs->source);
	3772
	3773	if(U_FAILURE(*pErrorCode)) {
	3774	/* not mappable or buffer overflow */
	3775	break;
	3776	} else {
	3777	/* a mapping was written to the target, continue */
	3778
	3779	/* recalculate the targetCapacity after an extension mapping */
	3780	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	3781
	3782	/* normal end of conversion: prepare for a new character */
	3783	sourceIndex=nextSourceIndex;
	3784	}
	3785	}
	3786	} else {
	3787	/* target is full */
	3788	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	3789	break;
	3790	}
	3791	}
	3792
	3793	/* set the converter state back into UConverter */
	3794	cnv->fromUChar32=c;
	3795
	3796	/* write back the updated pointers */
	3797	pArgs->source=source;
	3798	pArgs->target=(char *)target;
	3799	pArgs->offsets=offsets;
	3800	}
	3801
	3802	/*
	3803	* This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
	3804	* that map only to and from the BMP.
	3805	* In addition to single-byte/state optimizations, the offset calculations
	3806	* become much easier.
	3807	* It would be possible to use the sbcsIndex for UTF-8-friendly tables,
	3808	* but measurements have shown that this diminishes performance
	3809	* in more cases than it improves it.
	3810	* See SVN revision 21013 (2007-feb-06) for the last version with #if switches
	3811	* for various MBCS and SBCS optimizations.
	3812	*/
	3813	static void
	3814	ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
	3815	UErrorCode *pErrorCode) {
	3816	UConverter *cnv;
	3817	const UChar source, sourceLimit, *lastSource;
	3818	uint8_t *target;
	3819	int32_t targetCapacity, length;
	3820	int32_t *offsets;
	3821
	3822	const uint16_t *table;
	3823	const uint16_t *results;
	3824
	3825	UChar32 c;
	3826
	3827	int32_t sourceIndex;
	3828
	3829	uint32_t asciiRoundtrips;
	3830	uint16_t value, minValue;
	3831
	3832	/* set up the local pointers */
	3833	cnv=pArgs->converter;
	3834	source=pArgs->source;
	3835	sourceLimit=pArgs->sourceLimit;
	3836	target=(uint8_t *)pArgs->target;
	3837	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	3838	offsets=pArgs->offsets;
	3839
	3840	table=cnv->sharedData->mbcs.fromUnicodeTable;
	3841	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	3842	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	3843	} else {
	3844	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	3845	}
	3846	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	3847
	3848	if(cnv->useFallback) {
	3849	/* use all roundtrip and fallback results */
	3850	minValue=0x800;
	3851	} else {
	3852	/* use only roundtrips and fallbacks from private-use characters */
	3853	minValue=0xc00;
	3854	}
	3855
	3856	/* get the converter state from UConverter */
	3857	c=cnv->fromUChar32;
	3858
	3859	/* sourceIndex=-1 if the current character began in the previous buffer */
	3860	sourceIndex= c==0 ? 0 : -1;
	3861	lastSource=source;
	3862
	3863	/*
	3864	* since the conversion here is 1:1 UChar:uint8_t, we need only one counter
	3865	* for the minimum of the sourceLength and targetCapacity
	3866	*/
	3867	length=(int32_t)(sourceLimit-source);
	3868	if(length<targetCapacity) {
	3869	targetCapacity=length;
	3870	}
	3871
	3872	/* conversion loop */
	3873	if(c!=0 && targetCapacity>0) {
	3874	goto getTrail;
	3875	}
	3876
	3877	#if MBCS_UNROLL_SINGLE_FROM_BMP
	3878	/* unrolling makes it slower on Pentium III/Windows 2000?! */
	3879	/* unroll the loop with the most common case */
	3880	unrolled:
	3881	if(targetCapacity>=4) {
	3882	int32_t count, loops;
	3883	uint16_t andedValues;
	3884
	3885	loops=count=targetCapacity>>2;
	3886	do {
	3887	c=*source++;
	3888	andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3889	*target++=(uint8_t)value;
	3890	c=*source++;
	3891	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3892	*target++=(uint8_t)value;
	3893	c=*source++;
	3894	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3895	*target++=(uint8_t)value;
	3896	c=*source++;
	3897	andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3898	*target++=(uint8_t)value;
	3899
	3900	/* were all 4 entries really valid? */
	3901	if(andedValues<minValue) {
	3902	/* no, return to the first of these 4 */
	3903	source-=4;
	3904	target-=4;
	3905	break;
	3906	}
	3907	} while(--count>0);
	3908	count=loops-count;
	3909	targetCapacity-=4*count;
	3910
	3911	if(offsets!=NULL) {
	3912	lastSource+=4*count;
	3913	while(count>0) {
	3914	*offsets++=sourceIndex++;
	3915	*offsets++=sourceIndex++;
	3916	*offsets++=sourceIndex++;
	3917	*offsets++=sourceIndex++;
	3918	--count;
	3919	}
	3920	}
	3921
	3922	c=0;
	3923	}
	3924	#endif
	3925
	3926	while(targetCapacity>0) {
	3927	/*
	3928	* Get a correct Unicode code point:
	3929	* a single UChar for a BMP code point or
	3930	* a matched surrogate pair for a "supplementary code point".
	3931	*/
	3932	c=*source++;
	3933	/*
	3934	* Do not immediately check for single surrogates:
	3935	* Assume that they are unassigned and check for them in that case.
	3936	* This speeds up the conversion of assigned characters.
	3937	*/
	3938	/* convert the Unicode code point in c into codepage bytes */
	3939	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
	3940	*target++=(uint8_t)c;
	3941	--targetCapacity;
	3942	c=0;
	3943	continue;
	3944	}
	3945	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	3946	/* is this code point assigned, or do we use fallbacks? */
	3947	if(value>=minValue) {
	3948	/* assigned, write the output character bytes from value and length */
	3949	/* length==1 */
	3950	/* this is easy because we know that there is enough space */
	3951	*target++=(uint8_t)value;
	3952	--targetCapacity;
	3953
	3954	/* normal end of conversion: prepare for a new character */
	3955	c=0;
	3956	continue;
	3957	} else if(!U16_IS_SURROGATE(c)) {
	3958	/* normal, unassigned BMP character */
	3959	} else if(U16_IS_SURROGATE_LEAD(c)) {
	3960	getTrail:
	3961	if(source<sourceLimit) {
	3962	/* test the following code unit */
	3963	UChar trail=*source;
	3964	if(U16_IS_TRAIL(trail)) {
	3965	++source;
	3966	c=U16_GET_SUPPLEMENTARY(c, trail);
	3967	/* this codepage does not map supplementary code points */
	3968	/* callback(unassigned) */
	3969	} else {
	3970	/* this is an unmatched lead code unit (1st surrogate) */
	3971	/* callback(illegal) */
	3972	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3973	break;
	3974	}
	3975	} else {
	3976	/* no more input */
	3977	if (pArgs->flush) {
	3978	*pErrorCode=U_TRUNCATED_CHAR_FOUND;
	3979	}
	3980	break;
	3981	}
	3982	} else {
	3983	/* this is an unmatched trail code unit (2nd surrogate) */
	3984	/* callback(illegal) */
	3985	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	3986	break;
	3987	}
	3988
	3989	/* c does not have a mapping */
	3990
	3991	/* get the number of code units for c to correctly advance sourceIndex */
	3992	length=U16_LENGTH(c);
	3993
	3994	/* set offsets since the start or the last extension */
	3995	if(offsets!=NULL) {
	3996	int32_t count=(int32_t)(source-lastSource);
	3997
	3998	/* do not set the offset for this character */
	3999	count-=length;
	4000
	4001	while(count>0) {
	4002	*offsets++=sourceIndex++;
	4003	--count;
	4004	}
	4005	/* offsets and sourceIndex are now set for the current character */
	4006	}
	4007
	4008	/* try an extension mapping */
	4009	lastSource=source;
	4010	c=_extFromU(cnv, cnv->sharedData,
	4011	c, &source, sourceLimit,
	4012	&target, (const uint8_t *)(pArgs->targetLimit),
	4013	&offsets, sourceIndex,
	4014	pArgs->flush,
	4015	pErrorCode);
	4016	sourceIndex+=length+(int32_t)(source-lastSource);
	4017	lastSource=source;
	4018
	4019	if(U_FAILURE(*pErrorCode)) {
	4020	/* not mappable or buffer overflow */
	4021	break;
	4022	} else {
	4023	/* a mapping was written to the target, continue */
	4024
	4025	/* recalculate the targetCapacity after an extension mapping */
	4026	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	4027	length=(int32_t)(sourceLimit-source);
	4028	if(length<targetCapacity) {
	4029	targetCapacity=length;
	4030	}
	4031	}
	4032
	4033	#if MBCS_UNROLL_SINGLE_FROM_BMP
	4034	/* unrolling makes it slower on Pentium III/Windows 2000?! */
	4035	goto unrolled;
	4036	#endif
	4037	}
	4038
	4039	if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {
	4040	/* target is full */
	4041	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4042	}
	4043
	4044	/* set offsets since the start or the last callback */
	4045	if(offsets!=NULL) {
	4046	size_t count=source-lastSource;
	4047	if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
	4048	/*
	4049	Caller gave us a partial supplementary character,
	4050	which this function couldn't convert in any case.
	4051	The callback will handle the offset.
	4052	*/
	4053	count--;
	4054	}
	4055	while(count>0) {
	4056	*offsets++=sourceIndex++;
	4057	--count;
	4058	}
	4059	}
	4060
	4061	/* set the converter state back into UConverter */
	4062	cnv->fromUChar32=c;
	4063
	4064	/* write back the updated pointers */
	4065	pArgs->source=source;
	4066	pArgs->target=(char *)target;
	4067	pArgs->offsets=offsets;
	4068	}
	4069
	4070	U_CFUNC void
	4071	ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
	4072	UErrorCode *pErrorCode) {
	4073	UConverter *cnv;
	4074	const UChar source, sourceLimit;
	4075	uint8_t *target;
	4076	int32_t targetCapacity;
	4077	int32_t *offsets;
	4078
	4079	const uint16_t *table;
	4080	const uint16_t *mbcsIndex;
	4081	const uint8_t p, bytes;
	4082	uint8_t outputType;
	4083
	4084	UChar32 c;
	4085
	4086	int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
	4087
	4088	uint32_t stage2Entry;
	4089	uint32_t asciiRoundtrips;
	4090	uint32_t value;
	4091	/* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
	4092	uint8_t siBytes[2] = {0, 0};
	4093	uint8_t soBytes[2] = {0, 0};
	4094	uint8_t siLength, soLength;
	4095	int32_t length = 0, prevLength;
	4096	uint8_t unicodeMask;
	4097
	4098	cnv=pArgs->converter;
	4099
	4100	if(cnv->preFromUFirstCP>=0) {
	4101	/*
	4102	* pass sourceIndex=-1 because we continue from an earlier buffer
	4103	* in the future, this may change with continuous offsets
	4104	*/
	4105	ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode);
	4106
	4107	if(U_FAILURE(*pErrorCode) \|\| cnv->preFromULength<0) {
	4108	return;
	4109	}
	4110	}
	4111
	4112	/* use optimized function if possible */
	4113	outputType=cnv->sharedData->mbcs.outputType;
	4114	unicodeMask=cnv->sharedData->mbcs.unicodeMask;
	4115	if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
	4116	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	4117	ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
	4118	} else {
	4119	ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
	4120	}
	4121	return;
	4122	} else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) {
	4123	ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode);
	4124	return;
	4125	}
	4126
	4127	/* set up the local pointers */
	4128	source=pArgs->source;
	4129	sourceLimit=pArgs->sourceLimit;
	4130	target=(uint8_t *)pArgs->target;
	4131	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
	4132	offsets=pArgs->offsets;
	4133
	4134	table=cnv->sharedData->mbcs.fromUnicodeTable;
	4135	if(cnv->sharedData->mbcs.utf8Friendly) {
	4136	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
	4137	} else {
	4138	mbcsIndex=NULL;
	4139	}
	4140	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	4141	bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	4142	} else {
	4143	bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
	4144	}
	4145	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	4146
	4147	/* get the converter state from UConverter */
	4148	c=cnv->fromUChar32;
	4149
	4150	if(outputType==MBCS_OUTPUT_2_SISO) {
	4151	prevLength=cnv->fromUnicodeStatus;
	4152	if(prevLength==0) {
	4153	/* set the real value */
	4154	prevLength=1;
	4155	}
	4156	} else {
	4157	/* prevent fromUnicodeStatus from being set to something non-0 */
	4158	prevLength=0;
	4159	}
	4160
	4161	/* sourceIndex=-1 if the current character began in the previous buffer */
	4162	prevSourceIndex=-1;
	4163	sourceIndex= c==0 ? 0 : -1;
	4164	nextSourceIndex=0;
	4165
	4166	/* Get the SI/SO character for the converter */
	4167	siLength = static_cast<uint8_t>(getSISOBytes(SI, cnv->options, siBytes));
	4168	soLength = static_cast<uint8_t>(getSISOBytes(SO, cnv->options, soBytes));
	4169
	4170	/* conversion loop */
	4171	/*
	4172	* This is another piece of ugly code:
	4173	* A goto into the loop if the converter state contains a first surrogate
	4174	* from the previous function call.
	4175	* It saves me to check in each loop iteration a check of if(c==0)
	4176	* and duplicating the trail-surrogate-handling code in the else
	4177	* branch of that check.
	4178	* I could not find any other way to get around this other than
	4179	* using a function call for the conversion and callback, which would
	4180	* be even more inefficient.
	4181	*
	4182	* Markus Scherer 2000-jul-19
	4183	*/
	4184	if(c!=0 && targetCapacity>0) {
	4185	goto getTrail;
	4186	}
	4187
	4188	while(source<sourceLimit) {
	4189	/*
	4190	* This following test is to see if available input would overflow the output.
	4191	* It does not catch output of more than one byte that
	4192	* overflows as a result of a multi-byte character or callback output
	4193	* from the last source character.
	4194	* Therefore, those situations also test for overflows and will
	4195	* then break the loop, too.
	4196	*/
	4197	if(targetCapacity>0) {
	4198	/*
	4199	* Get a correct Unicode code point:
	4200	* a single UChar for a BMP code point or
	4201	* a matched surrogate pair for a "supplementary code point".
	4202	*/
	4203	c=*source++;
	4204	++nextSourceIndex;
	4205	if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
	4206	*target++=(uint8_t)c;
	4207	if(offsets!=NULL) {
	4208	*offsets++=sourceIndex;
	4209	prevSourceIndex=sourceIndex;
	4210	sourceIndex=nextSourceIndex;
	4211	}
	4212	--targetCapacity;
	4213	c=0;
	4214	continue;
	4215	}
	4216	/*
	4217	* utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
	4218	* to avoid dealing with surrogates.
	4219	* MBCS_FAST_MAX must be >=0xd7ff.
	4220	*/
	4221	if(c<=0xd7ff && mbcsIndex!=NULL) {
	4222	value=mbcsIndex[c>>6];
	4223
	4224	/* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
	4225	/* There are only roundtrips (!=0) and no-mapping (==0) entries. */
	4226	switch(outputType) {
	4227	case MBCS_OUTPUT_2:
	4228	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4229	if(value<=0xff) {
	4230	if(value==0) {
	4231	goto unassigned;
	4232	} else {
	4233	length=1;
	4234	}
	4235	} else {
	4236	length=2;
	4237	}
	4238	break;
	4239	case MBCS_OUTPUT_2_SISO:
	4240	/* 1/2-byte stateful with Shift-In/Shift-Out */
	4241	/*
	4242	* Save the old state in the converter object
	4243	* right here, then change the local prevLength state variable if necessary.
	4244	* Then, if this character turns out to be unassigned or a fallback that
	4245	* is not taken, the callback code must not save the new state in the converter
	4246	* because the new state is for a character that is not output.
	4247	* However, the callback must still restore the state from the converter
	4248	* in case the callback function changed it for its output.
	4249	*/
	4250	cnv->fromUnicodeStatus=prevLength; /* save the old state */
	4251	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4252	if(value<=0xff) {
	4253	if(value==0) {
	4254	goto unassigned;
	4255	} else if(prevLength<=1) {
	4256	length=1;
	4257	} else {
	4258	/* change from double-byte mode to single-byte */
	4259	if (siLength == 1) {
	4260	value\|=(uint32_t)siBytes[0]<<8;
	4261	length = 2;
	4262	} else if (siLength == 2) {
	4263	value\|=(uint32_t)siBytes[1]<<8;
	4264	value\|=(uint32_t)siBytes[0]<<16;
	4265	length = 3;
	4266	}
	4267	prevLength=1;
	4268	}
	4269	} else {
	4270	if(prevLength==2) {
	4271	length=2;
	4272	} else {
	4273	/* change from single-byte mode to double-byte */
	4274	if (soLength == 1) {
	4275	value\|=(uint32_t)soBytes[0]<<16;
	4276	length = 3;
	4277	} else if (soLength == 2) {
	4278	value\|=(uint32_t)soBytes[1]<<16;
	4279	value\|=(uint32_t)soBytes[0]<<24;
	4280	length = 4;
	4281	}
	4282	prevLength=2;
	4283	}
	4284	}
	4285	break;
	4286	case MBCS_OUTPUT_DBCS_ONLY:
	4287	/* table with single-byte results, but only DBCS mappings used */
	4288	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4289	if(value<=0xff) {
	4290	/* no mapping or SBCS result, not taken for DBCS-only */
	4291	goto unassigned;
	4292	} else {
	4293	length=2;
	4294	}
	4295	break;
	4296	case MBCS_OUTPUT_3:
	4297	p=bytes+(value+(c&0x3f))*3;
	4298	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4299	if(value<=0xff) {
	4300	if(value==0) {
	4301	goto unassigned;
	4302	} else {
	4303	length=1;
	4304	}
	4305	} else if(value<=0xffff) {
	4306	length=2;
	4307	} else {
	4308	length=3;
	4309	}
	4310	break;
	4311	case MBCS_OUTPUT_4:
	4312	value=((const uint32_t *)bytes)[value +(c&0x3f)];
	4313	if(value<=0xff) {
	4314	if(value==0) {
	4315	goto unassigned;
	4316	} else {
	4317	length=1;
	4318	}
	4319	} else if(value<=0xffff) {
	4320	length=2;
	4321	} else if(value<=0xffffff) {
	4322	length=3;
	4323	} else {
	4324	length=4;
	4325	}
	4326	break;
	4327	case MBCS_OUTPUT_3_EUC:
	4328	value=((const uint16_t *)bytes)[value +(c&0x3f)];
	4329	/* EUC 16-bit fixed-length representation */
	4330	if(value<=0xff) {
	4331	if(value==0) {
	4332	goto unassigned;
	4333	} else {
	4334	length=1;
	4335	}
	4336	} else if((value&0x8000)==0) {
	4337	value\|=0x8e8000;
	4338	length=3;
	4339	} else if((value&0x80)==0) {
	4340	value\|=0x8f0080;
	4341	length=3;
	4342	} else {
	4343	length=2;
	4344	}
	4345	break;
	4346	case MBCS_OUTPUT_4_EUC:
	4347	p=bytes+(value+(c&0x3f))*3;
	4348	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4349	/* EUC 16-bit fixed-length representation applied to the first two bytes */
	4350	if(value<=0xff) {
	4351	if(value==0) {
	4352	goto unassigned;
	4353	} else {
	4354	length=1;
	4355	}
	4356	} else if(value<=0xffff) {
	4357	length=2;
	4358	} else if((value&0x800000)==0) {
	4359	value\|=0x8e800000;
	4360	length=4;
	4361	} else if((value&0x8000)==0) {
	4362	value\|=0x8f008000;
	4363	length=4;
	4364	} else {
	4365	length=3;
	4366	}
	4367	break;
	4368	default:
	4369	/* must not occur */
	4370	/*
	4371	* To avoid compiler warnings that value & length may be
	4372	* used without having been initialized, we set them here.
	4373	* In reality, this is unreachable code.
	4374	* Not having a default branch also causes warnings with
	4375	* some compilers.
	4376	*/
	4377	value=0;
	4378	length=0;
	4379	break;
	4380	}
	4381	/* output the value */
	4382	} else {
	4383	/*
	4384	* This also tests if the codepage maps single surrogates.
	4385	* If it does, then surrogates are not paired but mapped separately.
	4386	* Note that in this case unmatched surrogates are not detected.
	4387	*/
	4388	if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
	4389	if(U16_IS_SURROGATE_LEAD(c)) {
	4390	getTrail:
	4391	if(source<sourceLimit) {
	4392	/* test the following code unit */
	4393	UChar trail=*source;
	4394	if(U16_IS_TRAIL(trail)) {
	4395	++source;
	4396	++nextSourceIndex;
	4397	c=U16_GET_SUPPLEMENTARY(c, trail);
	4398	if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	4399	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	4400	cnv->fromUnicodeStatus=prevLength; /* save the old state */
	4401	/* callback(unassigned) */
	4402	goto unassigned;
	4403	}
	4404	/* convert this supplementary code point */
	4405	/* exit this condition tree */
	4406	} else {
	4407	/* this is an unmatched lead code unit (1st surrogate) */
	4408	/* callback(illegal) */
	4409	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	4410	break;
	4411	}
	4412	} else {
	4413	/* no more input */
	4414	break;
	4415	}
	4416	} else {
	4417	/* this is an unmatched trail code unit (2nd surrogate) */
	4418	/* callback(illegal) */
	4419	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	4420	break;
	4421	}
	4422	}
	4423
	4424	/* convert the Unicode code point in c into codepage bytes */
	4425
	4426	/*
	4427	* The basic lookup is a triple-stage compact array (trie) lookup.
	4428	* For details see the beginning of this file.
	4429	*
	4430	* Single-byte codepages are handled with a different data structure
	4431	* by _MBCSSingle... functions.
	4432	*
	4433	* The result consists of a 32-bit value from stage 2 and
	4434	* a pointer to as many bytes as are stored per character.
	4435	* The pointer points to the character's bytes in stage 3.
	4436	* Bits 15..0 of the stage 2 entry contain the stage 3 index
	4437	* for that pointer, while bits 31..16 are flags for which of
	4438	* the 16 characters in the block are roundtrip-assigned.
	4439	*
	4440	* For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
	4441	* respectively as uint32_t, in the platform encoding.
	4442	* For 3-byte codepages, the bytes are always stored in big-endian order.
	4443	*
	4444	* For EUC encodings that use only either 0x8e or 0x8f as the first
	4445	* byte of their longest byte sequences, the first two bytes in
	4446	* this third stage indicate with their 7th bits whether these bytes
	4447	* are to be written directly or actually need to be preceeded by
	4448	* one of the two Single-Shift codes. With this, the third stage
	4449	* stores one byte fewer per character than the actual maximum length of
	4450	* EUC byte sequences.
	4451	*
	4452	* Other than that, leading zero bytes are removed and the other
	4453	* bytes output. A single zero byte may be output if the "assigned"
	4454	* bit in stage 2 was on.
	4455	* The data structure does not support zero byte output as a fallback,
	4456	* and also does not allow output of leading zeros.
	4457	*/
	4458	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	4459
	4460	/* get the bytes and the length for the output */
	4461	switch(outputType) {
	4462	case MBCS_OUTPUT_2:
	4463	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4464	if(value<=0xff) {
	4465	length=1;
	4466	} else {
	4467	length=2;
	4468	}
	4469	break;
	4470	case MBCS_OUTPUT_2_SISO:
	4471	/* 1/2-byte stateful with Shift-In/Shift-Out */
	4472	/*
	4473	* Save the old state in the converter object
	4474	* right here, then change the local prevLength state variable if necessary.
	4475	* Then, if this character turns out to be unassigned or a fallback that
	4476	* is not taken, the callback code must not save the new state in the converter
	4477	* because the new state is for a character that is not output.
	4478	* However, the callback must still restore the state from the converter
	4479	* in case the callback function changed it for its output.
	4480	*/
	4481	cnv->fromUnicodeStatus=prevLength; /* save the old state */
	4482	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4483	if(value<=0xff) {
	4484	if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) {
	4485	/* no mapping, leave value==0 */
	4486	length=0;
	4487	} else if(prevLength<=1) {
	4488	length=1;
	4489	} else {
	4490	/* change from double-byte mode to single-byte */
	4491	if (siLength == 1) {
	4492	value\|=(uint32_t)siBytes[0]<<8;
	4493	length = 2;
	4494	} else if (siLength == 2) {
	4495	value\|=(uint32_t)siBytes[1]<<8;
	4496	value\|=(uint32_t)siBytes[0]<<16;
	4497	length = 3;
	4498	}
	4499	prevLength=1;
	4500	}
	4501	} else {
	4502	if(prevLength==2) {
	4503	length=2;
	4504	} else {
	4505	/* change from single-byte mode to double-byte */
	4506	if (soLength == 1) {
	4507	value\|=(uint32_t)soBytes[0]<<16;
	4508	length = 3;
	4509	} else if (soLength == 2) {
	4510	value\|=(uint32_t)soBytes[1]<<16;
	4511	value\|=(uint32_t)soBytes[0]<<24;
	4512	length = 4;
	4513	}
	4514	prevLength=2;
	4515	}
	4516	}
	4517	break;
	4518	case MBCS_OUTPUT_DBCS_ONLY:
	4519	/* table with single-byte results, but only DBCS mappings used */
	4520	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4521	if(value<=0xff) {
	4522	/* no mapping or SBCS result, not taken for DBCS-only */
	4523	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
	4524	length=0;
	4525	} else {
	4526	length=2;
	4527	}
	4528	break;
	4529	case MBCS_OUTPUT_3:
	4530	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
	4531	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4532	if(value<=0xff) {
	4533	length=1;
	4534	} else if(value<=0xffff) {
	4535	length=2;
	4536	} else {
	4537	length=3;
	4538	}
	4539	break;
	4540	case MBCS_OUTPUT_4:
	4541	value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
	4542	if(value<=0xff) {
	4543	length=1;
	4544	} else if(value<=0xffff) {
	4545	length=2;
	4546	} else if(value<=0xffffff) {
	4547	length=3;
	4548	} else {
	4549	length=4;
	4550	}
	4551	break;
	4552	case MBCS_OUTPUT_3_EUC:
	4553	value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
	4554	/* EUC 16-bit fixed-length representation */
	4555	if(value<=0xff) {
	4556	length=1;
	4557	} else if((value&0x8000)==0) {
	4558	value\|=0x8e8000;
	4559	length=3;
	4560	} else if((value&0x80)==0) {
	4561	value\|=0x8f0080;
	4562	length=3;
	4563	} else {
	4564	length=2;
	4565	}
	4566	break;
	4567	case MBCS_OUTPUT_4_EUC:
	4568	p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
	4569	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4570	/* EUC 16-bit fixed-length representation applied to the first two bytes */
	4571	if(value<=0xff) {
	4572	length=1;
	4573	} else if(value<=0xffff) {
	4574	length=2;
	4575	} else if((value&0x800000)==0) {
	4576	value\|=0x8e800000;
	4577	length=4;
	4578	} else if((value&0x8000)==0) {
	4579	value\|=0x8f008000;
	4580	length=4;
	4581	} else {
	4582	length=3;
	4583	}
	4584	break;
	4585	default:
	4586	/* must not occur */
	4587	/*
	4588	* To avoid compiler warnings that value & length may be
	4589	* used without having been initialized, we set them here.
	4590	* In reality, this is unreachable code.
	4591	* Not having a default branch also causes warnings with
	4592	* some compilers.
	4593	*/
	4594	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
	4595	length=0;
	4596	break;
	4597	}
	4598
	4599	/* is this code point assigned, or do we use fallbacks? */
	4600	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 \|\|
	4601	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
	4602	) {
	4603	/*
	4604	* We allow a 0 byte output if the "assigned" bit is set for this entry.
	4605	* There is no way with this data structure for fallback output
	4606	* to be a zero byte.
	4607	*/
	4608
	4609	unassigned:
	4610	/* try an extension mapping */
	4611	pArgs->source=source;
	4612	c=_extFromU(cnv, cnv->sharedData,
	4613	c, &source, sourceLimit,
	4614	&target, target+targetCapacity,
	4615	&offsets, sourceIndex,
	4616	pArgs->flush,
	4617	pErrorCode);
	4618	nextSourceIndex+=(int32_t)(source-pArgs->source);
	4619	prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
	4620
	4621	if(U_FAILURE(*pErrorCode)) {
	4622	/* not mappable or buffer overflow */
	4623	break;
	4624	} else {
	4625	/* a mapping was written to the target, continue */
	4626
	4627	/* recalculate the targetCapacity after an extension mapping */
	4628	targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
	4629
	4630	/* normal end of conversion: prepare for a new character */
	4631	if(offsets!=NULL) {
	4632	prevSourceIndex=sourceIndex;
	4633	sourceIndex=nextSourceIndex;
	4634	}
	4635	continue;
	4636	}
	4637	}
	4638	}
	4639
	4640	/* write the output character bytes from value and length */
	4641	/* from the first if in the loop we know that targetCapacity>0 */
	4642	if(length<=targetCapacity) {
	4643	if(offsets==NULL) {
	4644	switch(length) {
	4645	/* each branch falls through to the next one */
	4646	case 4:
	4647	*target++=(uint8_t)(value>>24);
	4648	U_FALLTHROUGH;
	4649	case 3:
	4650	*target++=(uint8_t)(value>>16);
	4651	U_FALLTHROUGH;
	4652	case 2:
	4653	*target++=(uint8_t)(value>>8);
	4654	U_FALLTHROUGH;
	4655	case 1:
	4656	*target++=(uint8_t)value;
	4657	U_FALLTHROUGH;
	4658	default:
	4659	/* will never occur */
	4660	break;
	4661	}
	4662	} else {
	4663	switch(length) {
	4664	/* each branch falls through to the next one */
	4665	case 4:
	4666	*target++=(uint8_t)(value>>24);
	4667	*offsets++=sourceIndex;
	4668	U_FALLTHROUGH;
	4669	case 3:
	4670	*target++=(uint8_t)(value>>16);
	4671	*offsets++=sourceIndex;
	4672	U_FALLTHROUGH;
	4673	case 2:
	4674	*target++=(uint8_t)(value>>8);
	4675	*offsets++=sourceIndex;
	4676	U_FALLTHROUGH;
	4677	case 1:
	4678	*target++=(uint8_t)value;
	4679	*offsets++=sourceIndex;
	4680	U_FALLTHROUGH;
	4681	default:
	4682	/* will never occur */
	4683	break;
	4684	}
	4685	}
	4686	targetCapacity-=length;
	4687	} else {
	4688	uint8_t *charErrorBuffer;
	4689
	4690	/*
	4691	* We actually do this backwards here:
	4692	* In order to save an intermediate variable, we output
	4693	* first to the overflow buffer what does not fit into the
	4694	* regular target.
	4695	*/
	4696	/* we know that 1<=targetCapacity<length<=4 */
	4697	length-=targetCapacity;
	4698	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
	4699	switch(length) {
	4700	/* each branch falls through to the next one */
	4701	case 3:
	4702	*charErrorBuffer++=(uint8_t)(value>>16);
	4703	U_FALLTHROUGH;
	4704	case 2:
	4705	*charErrorBuffer++=(uint8_t)(value>>8);
	4706	U_FALLTHROUGH;
	4707	case 1:
	4708	*charErrorBuffer=(uint8_t)value;
	4709	U_FALLTHROUGH;
	4710	default:
	4711	/* will never occur */
	4712	break;
	4713	}
	4714	cnv->charErrorBufferLength=(int8_t)length;
	4715
	4716	/* now output what fits into the regular target */
	4717	value>>=8length; / length was reduced by targetCapacity */
	4718	switch(targetCapacity) {
	4719	/* each branch falls through to the next one */
	4720	case 3:
	4721	*target++=(uint8_t)(value>>16);
	4722	if(offsets!=NULL) {
	4723	*offsets++=sourceIndex;
	4724	}
	4725	U_FALLTHROUGH;
	4726	case 2:
	4727	*target++=(uint8_t)(value>>8);
	4728	if(offsets!=NULL) {
	4729	*offsets++=sourceIndex;
	4730	}
	4731	U_FALLTHROUGH;
	4732	case 1:
	4733	*target++=(uint8_t)value;
	4734	if(offsets!=NULL) {
	4735	*offsets++=sourceIndex;
	4736	}
	4737	U_FALLTHROUGH;
	4738	default:
	4739	/* will never occur */
	4740	break;
	4741	}
	4742
	4743	/* target overflow */
	4744	targetCapacity=0;
	4745	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4746	c=0;
	4747	break;
	4748	}
	4749
	4750	/* normal end of conversion: prepare for a new character */
	4751	c=0;
	4752	if(offsets!=NULL) {
	4753	prevSourceIndex=sourceIndex;
	4754	sourceIndex=nextSourceIndex;
	4755	}
	4756	continue;
	4757	} else {
	4758	/* target is full */
	4759	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4760	break;
	4761	}
	4762	}
	4763
	4764	/*
	4765	* the end of the input stream and detection of truncated input
	4766	* are handled by the framework, but for EBCDIC_STATEFUL conversion
	4767	* we need to emit an SI at the very end
	4768	*
	4769	* conditions:
	4770	* successful
	4771	* EBCDIC_STATEFUL in DBCS mode
	4772	* end of input and no truncated input
	4773	*/
	4774	if( U_SUCCESS(*pErrorCode) &&
	4775	outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
	4776	pArgs->flush && source>=sourceLimit && c==0
	4777	) {
	4778	/* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
	4779	if(targetCapacity>0) {
	4780	*target++=(uint8_t)siBytes[0];
	4781	if (siLength == 2) {
	4782	if (targetCapacity<2) {
	4783	cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
	4784	cnv->charErrorBufferLength=1;
	4785	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4786	} else {
	4787	*target++=(uint8_t)siBytes[1];
	4788	}
	4789	}
	4790	if(offsets!=NULL) {
	4791	/* set the last source character's index (sourceIndex points at sourceLimit now) */
	4792	*offsets++=prevSourceIndex;
	4793	}
	4794	} else {
	4795	/* target is full */
	4796	cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
	4797	if (siLength == 2) {
	4798	cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
	4799	}
	4800	cnv->charErrorBufferLength=siLength;
	4801	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	4802	}
	4803	prevLength=1; /* we switched into SBCS */
	4804	}
	4805
	4806	/* set the converter state back into UConverter */
	4807	cnv->fromUChar32=c;
	4808	cnv->fromUnicodeStatus=prevLength;
	4809
	4810	/* write back the updated pointers */
	4811	pArgs->source=source;
	4812	pArgs->target=(char *)target;
	4813	pArgs->offsets=offsets;
	4814	}
	4815
	4816	/*
	4817	* This is another simple conversion function for internal use by other
	4818	* conversion implementations.
	4819	* It does not use the converter state nor call callbacks.
	4820	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	4821	* It handles conversion extensions but not GB 18030.
	4822	*
	4823	* It converts one single Unicode code point into codepage bytes, encoded
	4824	* as one 32-bit value. The function returns the number of bytes in *pValue:
	4825	* 1..4 the number of bytes in *pValue
	4826	* 0 unassigned (*pValue undefined)
	4827	* -1 illegal (currently not used, *pValue undefined)
	4828	*
	4829	* *pValue will contain the resulting bytes with the last byte in bits 7..0,
	4830	* the second to last byte in bits 15..8, etc.
	4831	* Currently, the function assumes but does not check that 0<=c<=0x10ffff.
	4832	*/
	4833	U_CFUNC int32_t
	4834	ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
	4835	UChar32 c, uint32_t *pValue,
	4836	UBool useFallback) {
	4837	const int32_t *cx;
	4838	const uint16_t *table;
	4839	#if 0
	4840	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
	4841	const uint8_t *p;
	4842	#endif
	4843	uint32_t stage2Entry;
	4844	uint32_t value;
	4845	int32_t length;
	4846
	4847	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	4848	if(c<=0xffff \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	4849	table=sharedData->mbcs.fromUnicodeTable;
	4850
	4851	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
	4852	if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
	4853	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
	4854	/* is this code point assigned, or do we use fallbacks? */
	4855	if(useFallback ? value>=0x800 : value>=0xc00) {
	4856	*pValue=value&0xff;
	4857	return 1;
	4858	}
	4859	} else /* outputType!=MBCS_OUTPUT_1 */ {
	4860	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	4861
	4862	/* get the bytes and the length for the output */
	4863	switch(sharedData->mbcs.outputType) {
	4864	case MBCS_OUTPUT_2:
	4865	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4866	if(value<=0xff) {
	4867	length=1;
	4868	} else {
	4869	length=2;
	4870	}
	4871	break;
	4872	#if 0
	4873	/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
	4874	case MBCS_OUTPUT_DBCS_ONLY:
	4875	/* table with single-byte results, but only DBCS mappings used */
	4876	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4877	if(value<=0xff) {
	4878	/* no mapping or SBCS result, not taken for DBCS-only */
	4879	value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
	4880	length=0;
	4881	} else {
	4882	length=2;
	4883	}
	4884	break;
	4885	case MBCS_OUTPUT_3:
	4886	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4887	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4888	if(value<=0xff) {
	4889	length=1;
	4890	} else if(value<=0xffff) {
	4891	length=2;
	4892	} else {
	4893	length=3;
	4894	}
	4895	break;
	4896	case MBCS_OUTPUT_4:
	4897	value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4898	if(value<=0xff) {
	4899	length=1;
	4900	} else if(value<=0xffff) {
	4901	length=2;
	4902	} else if(value<=0xffffff) {
	4903	length=3;
	4904	} else {
	4905	length=4;
	4906	}
	4907	break;
	4908	case MBCS_OUTPUT_3_EUC:
	4909	value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4910	/* EUC 16-bit fixed-length representation */
	4911	if(value<=0xff) {
	4912	length=1;
	4913	} else if((value&0x8000)==0) {
	4914	value\|=0x8e8000;
	4915	length=3;
	4916	} else if((value&0x80)==0) {
	4917	value\|=0x8f0080;
	4918	length=3;
	4919	} else {
	4920	length=2;
	4921	}
	4922	break;
	4923	case MBCS_OUTPUT_4_EUC:
	4924	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
	4925	value=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
	4926	/* EUC 16-bit fixed-length representation applied to the first two bytes */
	4927	if(value<=0xff) {
	4928	length=1;
	4929	} else if(value<=0xffff) {
	4930	length=2;
	4931	} else if((value&0x800000)==0) {
	4932	value\|=0x8e800000;
	4933	length=4;
	4934	} else if((value&0x8000)==0) {
	4935	value\|=0x8f008000;
	4936	length=4;
	4937	} else {
	4938	length=3;
	4939	}
	4940	break;
	4941	#endif
	4942	default:
	4943	/* must not occur */
	4944	return -1;
	4945	}
	4946
	4947	/* is this code point assigned, or do we use fallbacks? */
	4948	if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
	4949	(FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
	4950	) {
	4951	/*
	4952	* We allow a 0 byte output if the "assigned" bit is set for this entry.
	4953	* There is no way with this data structure for fallback output
	4954	* to be a zero byte.
	4955	*/
	4956	/* assigned */
	4957	*pValue=value;
	4958	return length;
	4959	}
	4960	}
	4961	}
	4962
	4963	cx=sharedData->mbcs.extIndexes;
	4964	if(cx!=NULL) {
	4965	length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
	4966	return length>=0 ? length : -length; /* return abs(length); */
	4967	}
	4968
	4969	/* unassigned */
	4970	return 0;
	4971	}
	4972
	4973
	4974	#if 0
	4975	/*
	4976	* This function has been moved to ucnv2022.c for inlining.
	4977	* This implementation is here only for documentation purposes
	4978	*/
	4979
	4980	/**
	4981	* This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages.
	4982	* It does not handle the EBCDIC swaplfnl option (set in UConverter).
	4983	* It does not handle conversion extensions (_extFromU()).
	4984	*
	4985	* It returns the codepage byte for the code point, or -1 if it is unassigned.
	4986	*/
	4987	U_CFUNC int32_t
	4988	ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
	4989	UChar32 c,
	4990	UBool useFallback) {
	4991	const uint16_t *table;
	4992	int32_t value;
	4993
	4994	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	4995	if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
	4996	return -1;
	4997	}
	4998
	4999	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
	5000	table=sharedData->mbcs.fromUnicodeTable;
	5001
	5002	/* get the byte for the output */
	5003	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
	5004	/* is this code point assigned, or do we use fallbacks? */
	5005	if(useFallback ? value>=0x800 : value>=0xc00) {
	5006	return value&0xff;
	5007	} else {
	5008	return -1;
	5009	}
	5010	}
	5011	#endif
	5012
	5013	/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
	5014
	5015	/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
	5016	static const UChar32
	5017	utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
	5018
	5019	static void U_CALLCONV
	5020	ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
	5021	UConverterToUnicodeArgs *pToUArgs,
	5022	UErrorCode *pErrorCode) {
	5023	UConverter utf8, cnv;
	5024	const uint8_t source, sourceLimit;
	5025	uint8_t *target;
	5026	int32_t targetCapacity;
	5027
	5028	const uint16_t table, sbcsIndex;
	5029	const uint16_t *results;
	5030
	5031	int8_t oldToULength, toULength, toULimit;
	5032
	5033	UChar32 c;
	5034	uint8_t b, t1, t2;
	5035
	5036	uint32_t asciiRoundtrips;
	5037	uint16_t value, minValue = 0;
	5038	UBool hasSupplementary;
	5039
	5040	/* set up the local pointers */
	5041	utf8=pToUArgs->converter;
	5042	cnv=pFromUArgs->converter;
	5043	source=(uint8_t *)pToUArgs->source;
	5044	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
	5045	target=(uint8_t *)pFromUArgs->target;
	5046	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
	5047
	5048	table=cnv->sharedData->mbcs.fromUnicodeTable;
	5049	sbcsIndex=cnv->sharedData->mbcs.sbcsIndex;
	5050	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	5051	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	5052	} else {
	5053	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	5054	}
	5055	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	5056
	5057	if(cnv->useFallback) {
	5058	/* use all roundtrip and fallback results */
	5059	minValue=0x800;
	5060	} else {
	5061	/* use only roundtrips and fallbacks from private-use characters */
	5062	minValue=0xc00;
	5063	}
	5064	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
	5065
	5066	/* get the converter state from the UTF-8 UConverter */
	5067	if(utf8->toULength > 0) {
	5068	toULength=oldToULength=utf8->toULength;
	5069	toULimit=(int8_t)utf8->mode;
	5070	c=(UChar32)utf8->toUnicodeStatus;
	5071	} else {
	5072	toULength=oldToULength=toULimit=0;
	5073	c = 0;
	5074	}
	5075
	5076	// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
	5077	// If the buffer ends with a truncated 2- or 3-byte sequence,
	5078	// then we reduce the sourceLimit to before that,
	5079	// and collect the remaining bytes after the conversion loop.
	5080	{
	5081	// Do not go back into the bytes that will be read for finishing a partial
	5082	// sequence from the previous buffer.
	5083	int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
	5084	if(length>0) {
	5085	uint8_t b1=*(sourceLimit-1);
	5086	if(U8_IS_SINGLE(b1)) {
	5087	// common ASCII character
	5088	} else if(U8_IS_TRAIL(b1) && length>=2) {
	5089	uint8_t b2=*(sourceLimit-2);
	5090	if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
	5091	// truncated 3-byte sequence
	5092	sourceLimit-=2;
	5093	}
	5094	} else if(0xc2<=b1 && b1<0xf0) {
	5095	// truncated 2- or 3-byte sequence
	5096	--sourceLimit;
	5097	}
	5098	}
	5099	}
	5100
	5101	if(c!=0 && targetCapacity>0) {
	5102	utf8->toUnicodeStatus=0;
	5103	utf8->toULength=0;
	5104	goto moreBytes;
	5105	/*
	5106	* Note: We could avoid the goto by duplicating some of the moreBytes
	5107	* code, but only up to the point of collecting a complete UTF-8
	5108	* sequence; then recurse for the toUBytes[toULength]
	5109	* and then continue with normal conversion.
	5110	*
	5111	* If so, move this code to just after initializing the minimum
	5112	* set of local variables for reading the UTF-8 input
	5113	* (utf8, source, target, limits but not cnv, table, minValue, etc.).
	5114	*
	5115	* Potential advantages:
	5116	* - avoid the goto
	5117	* - oldToULength could become a local variable in just those code blocks
	5118	* that deal with buffer boundaries
	5119	* - possibly faster if the goto prevents some compiler optimizations
	5120	* (this would need measuring to confirm)
	5121	* Disadvantage:
	5122	* - code duplication
	5123	*/
	5124	}
	5125
	5126	/* conversion loop */
	5127	while(source<sourceLimit) {
	5128	if(targetCapacity>0) {
	5129	b=*source++;
	5130	if(U8_IS_SINGLE(b)) {
	5131	/* convert ASCII */
	5132	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
	5133	*target++=(uint8_t)b;
	5134	--targetCapacity;
	5135	continue;
	5136	} else {
	5137	c=b;
	5138	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c);
	5139	}
	5140	} else {
	5141	if(b<0xe0) {
	5142	if( /* handle U+0080..U+07FF inline */
	5143	b>=0xc2 &&
	5144	(t1=(uint8_t)(*source-0x80)) <= 0x3f
	5145	) {
	5146	c=b&0x1f;
	5147	++source;
	5148	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1);
	5149	if(value>=minValue) {
	5150	*target++=(uint8_t)value;
	5151	--targetCapacity;
	5152	continue;
	5153	} else {
	5154	c=(c<<6)\|t1;
	5155	}
	5156	} else {
	5157	c=-1;
	5158	}
	5159	} else if(b==0xe0) {
	5160	if( /* handle U+0800..U+0FFF inline */
	5161	(t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 &&
	5162	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
	5163	) {
	5164	c=t1;
	5165	source+=2;
	5166	value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2);
	5167	if(value>=minValue) {
	5168	*target++=(uint8_t)value;
	5169	--targetCapacity;
	5170	continue;
	5171	} else {
	5172	c=(c<<6)\|t2;
	5173	}
	5174	} else {
	5175	c=-1;
	5176	}
	5177	} else {
	5178	c=-1;
	5179	}
	5180
	5181	if(c<0) {
	5182	/* handle "complicated" and error cases, and continuing partial characters */
	5183	oldToULength=0;
	5184	toULength=1;
	5185	toULimit=U8_COUNT_BYTES_NON_ASCII(b);
	5186	c=b;
	5187	moreBytes:
	5188	while(toULength<toULimit) {
	5189	/*
	5190	* The sourceLimit may have been adjusted before the conversion loop
	5191	* to stop before a truncated sequence.
	5192	* Here we need to use the real limit in case we have two truncated
	5193	* sequences at the end.
	5194	* See ticket #7492.
	5195	*/
	5196	if(source<(uint8_t *)pToUArgs->sourceLimit) {
	5197	b=*source;
	5198	if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
	5199	++source;
	5200	++toULength;
	5201	c=(c<<6)+b;
	5202	} else {
	5203	break; /* sequence too short, stop with toULength<toULimit */
	5204	}
	5205	} else {
	5206	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
	5207	source-=(toULength-oldToULength);
	5208	while(oldToULength<toULength) {
	5209	utf8->toUBytes[oldToULength++]=*source++;
	5210	}
	5211	utf8->toUnicodeStatus=c;
	5212	utf8->toULength=toULength;
	5213	utf8->mode=toULimit;
	5214	pToUArgs->source=(char *)source;
	5215	pFromUArgs->target=(char *)target;
	5216	return;
	5217	}
	5218	}
	5219
	5220	if(toULength==toULimit) {
	5221	c-=utf8_offsets[toULength];
	5222	if(toULength<=3) { /* BMP */
	5223	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	5224	} else {
	5225	/* supplementary code point */
	5226	if(!hasSupplementary) {
	5227	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	5228	value=0;
	5229	} else {
	5230	value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
	5231	}
	5232	}
	5233	} else {
	5234	/* error handling: illegal UTF-8 byte sequence */
	5235	source-=(toULength-oldToULength);
	5236	while(oldToULength<toULength) {
	5237	utf8->toUBytes[oldToULength++]=*source++;
	5238	}
	5239	utf8->toULength=toULength;
	5240	pToUArgs->source=(char *)source;
	5241	pFromUArgs->target=(char *)target;
	5242	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	5243	return;
	5244	}
	5245	}
	5246	}
	5247
	5248	if(value>=minValue) {
	5249	/* output the mapping for c */
	5250	*target++=(uint8_t)value;
	5251	--targetCapacity;
	5252	} else {
	5253	/* value<minValue means c is unassigned (unmappable) */
	5254	/*
	5255	* Try an extension mapping.
	5256	* Pass in no source because we don't have UTF-16 input.
	5257	* If we have a partial match on c, we will return and revert
	5258	* to UTF-8->UTF-16->charset conversion.
	5259	*/
	5260	static const UChar nul=0;
	5261	const UChar *noSource=&nul;
	5262	c=_extFromU(cnv, cnv->sharedData,
	5263	c, &noSource, noSource,
	5264	&target, target+targetCapacity,
	5265	NULL, -1,
	5266	pFromUArgs->flush,
	5267	pErrorCode);
	5268
	5269	if(U_FAILURE(*pErrorCode)) {
	5270	/* not mappable or buffer overflow */
	5271	cnv->fromUChar32=c;
	5272	break;
	5273	} else if(cnv->preFromUFirstCP>=0) {
	5274	/*
	5275	* Partial match, return and revert to pivoting.
	5276	* In normal from-UTF-16 conversion, we would just continue
	5277	* but then exit the loop because the extension match would
	5278	* have consumed the source.
	5279	*/
	5280	*pErrorCode=U_USING_DEFAULT_WARNING;
	5281	break;
	5282	} else {
	5283	/* a mapping was written to the target, continue */
	5284
	5285	/* recalculate the targetCapacity after an extension mapping */
	5286	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
	5287	}
	5288	}
	5289	} else {
	5290	/* target is full */
	5291	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	5292	break;
	5293	}
	5294	}
	5295
	5296	/*
	5297	* The sourceLimit may have been adjusted before the conversion loop
	5298	* to stop before a truncated sequence.
	5299	* If so, then collect the truncated sequence now.
	5300	*/
	5301	if(U_SUCCESS(*pErrorCode) &&
	5302	cnv->preFromUFirstCP<0 &&
	5303	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
	5304	c=utf8->toUBytes[0]=b=*source++;
	5305	toULength=1;
	5306	toULimit=U8_COUNT_BYTES(b);
	5307	while(source<sourceLimit) {
	5308	utf8->toUBytes[toULength++]=b=*source++;
	5309	c=(c<<6)+b;
	5310	}
	5311	utf8->toUnicodeStatus=c;
	5312	utf8->toULength=toULength;
	5313	utf8->mode=toULimit;
	5314	}
	5315
	5316	/* write back the updated pointers */
	5317	pToUArgs->source=(char *)source;
	5318	pFromUArgs->target=(char *)target;
	5319	}
	5320
	5321	static void U_CALLCONV
	5322	ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
	5323	UConverterToUnicodeArgs *pToUArgs,
	5324	UErrorCode *pErrorCode) {
	5325	UConverter utf8, cnv;
	5326	const uint8_t source, sourceLimit;
	5327	uint8_t *target;
	5328	int32_t targetCapacity;
	5329
	5330	const uint16_t table, mbcsIndex;
	5331	const uint16_t *results;
	5332
	5333	int8_t oldToULength, toULength, toULimit;
	5334
	5335	UChar32 c;
	5336	uint8_t b, t1, t2;
	5337
	5338	uint32_t stage2Entry;
	5339	uint32_t asciiRoundtrips;
	5340	uint16_t value = 0;
	5341	UBool hasSupplementary;
	5342
	5343	/* set up the local pointers */
	5344	utf8=pToUArgs->converter;
	5345	cnv=pFromUArgs->converter;
	5346	source=(uint8_t *)pToUArgs->source;
	5347	sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
	5348	target=(uint8_t *)pFromUArgs->target;
	5349	targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
	5350
	5351	table=cnv->sharedData->mbcs.fromUnicodeTable;
	5352	mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
	5353	if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
	5354	results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
	5355	} else {
	5356	results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
	5357	}
	5358	asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
	5359
	5360	hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
	5361
	5362	/* get the converter state from the UTF-8 UConverter */
	5363	if(utf8->toULength > 0) {
	5364	toULength=oldToULength=utf8->toULength;
	5365	toULimit=(int8_t)utf8->mode;
	5366	c=(UChar32)utf8->toUnicodeStatus;
	5367	} else {
	5368	toULength=oldToULength=toULimit=0;
	5369	c = 0;
	5370	}
	5371
	5372	// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
	5373	// If the buffer ends with a truncated 2- or 3-byte sequence,
	5374	// then we reduce the sourceLimit to before that,
	5375	// and collect the remaining bytes after the conversion loop.
	5376	{
	5377	// Do not go back into the bytes that will be read for finishing a partial
	5378	// sequence from the previous buffer.
	5379	int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
	5380	if(length>0) {
	5381	uint8_t b1=*(sourceLimit-1);
	5382	if(U8_IS_SINGLE(b1)) {
	5383	// common ASCII character
	5384	} else if(U8_IS_TRAIL(b1) && length>=2) {
	5385	uint8_t b2=*(sourceLimit-2);
	5386	if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
	5387	// truncated 3-byte sequence
	5388	sourceLimit-=2;
	5389	}
	5390	} else if(0xc2<=b1 && b1<0xf0) {
	5391	// truncated 2- or 3-byte sequence
	5392	--sourceLimit;
	5393	}
	5394	}
	5395	}
	5396
	5397	if(c!=0 && targetCapacity>0) {
	5398	utf8->toUnicodeStatus=0;
	5399	utf8->toULength=0;
	5400	goto moreBytes;
	5401	/* See note in ucnv_SBCSFromUTF8() about this goto. */
	5402	}
	5403
	5404	/* conversion loop */
	5405	while(source<sourceLimit) {
	5406	if(targetCapacity>0) {
	5407	b=*source++;
	5408	if(U8_IS_SINGLE(b)) {
	5409	/* convert ASCII */
	5410	if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
	5411	*target++=b;
	5412	--targetCapacity;
	5413	continue;
	5414	} else {
	5415	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b);
	5416	if(value==0) {
	5417	c=b;
	5418	goto unassigned;
	5419	}
	5420	}
	5421	} else {
	5422	if(b>=0xe0) {
	5423	if( /* handle U+0800..U+D7FF inline */
	5424	b<=0xed && // do not assume maxFastUChar>0xd7ff
	5425	U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
	5426	(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
	5427	) {
	5428	c=((b&0xf)<<6)\|(t1&0x3f);
	5429	source+=2;
	5430	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
	5431	if(value==0) {
	5432	c=(c<<6)\|t2;
	5433	goto unassigned;
	5434	}
	5435	} else {
	5436	c=-1;
	5437	}
	5438	} else {
	5439	if( /* handle U+0080..U+07FF inline */
	5440	b>=0xc2 &&
	5441	(t1=(uint8_t)(*source-0x80)) <= 0x3f
	5442	) {
	5443	c=b&0x1f;
	5444	++source;
	5445	value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1);
	5446	if(value==0) {
	5447	c=(c<<6)\|t1;
	5448	goto unassigned;
	5449	}
	5450	} else {
	5451	c=-1;
	5452	}
	5453	}
	5454
	5455	if(c<0) {
	5456	/* handle "complicated" and error cases, and continuing partial characters */
	5457	oldToULength=0;
	5458	toULength=1;
	5459	toULimit=U8_COUNT_BYTES_NON_ASCII(b);
	5460	c=b;
	5461	moreBytes:
	5462	while(toULength<toULimit) {
	5463	/*
	5464	* The sourceLimit may have been adjusted before the conversion loop
	5465	* to stop before a truncated sequence.
	5466	* Here we need to use the real limit in case we have two truncated
	5467	* sequences at the end.
	5468	* See ticket #7492.
	5469	*/
	5470	if(source<(uint8_t *)pToUArgs->sourceLimit) {
	5471	b=*source;
	5472	if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
	5473	++source;
	5474	++toULength;
	5475	c=(c<<6)+b;
	5476	} else {
	5477	break; /* sequence too short, stop with toULength<toULimit */
	5478	}
	5479	} else {
	5480	/* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
	5481	source-=(toULength-oldToULength);
	5482	while(oldToULength<toULength) {
	5483	utf8->toUBytes[oldToULength++]=*source++;
	5484	}
	5485	utf8->toUnicodeStatus=c;
	5486	utf8->toULength=toULength;
	5487	utf8->mode=toULimit;
	5488	pToUArgs->source=(char *)source;
	5489	pFromUArgs->target=(char *)target;
	5490	return;
	5491	}
	5492	}
	5493
	5494	if(toULength==toULimit) {
	5495	c-=utf8_offsets[toULength];
	5496	if(toULength<=3) { /* BMP */
	5497	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	5498	} else {
	5499	/* supplementary code point */
	5500	if(!hasSupplementary) {
	5501	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
	5502	stage2Entry=0;
	5503	} else {
	5504	stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
	5505	}
	5506	}
	5507	} else {
	5508	/* error handling: illegal UTF-8 byte sequence */
	5509	source-=(toULength-oldToULength);
	5510	while(oldToULength<toULength) {
	5511	utf8->toUBytes[oldToULength++]=*source++;
	5512	}
	5513	utf8->toULength=toULength;
	5514	pToUArgs->source=(char *)source;
	5515	pFromUArgs->target=(char *)target;
	5516	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
	5517	return;
	5518	}
	5519
	5520	/* get the bytes and the length for the output */
	5521	/* MBCS_OUTPUT_2 */
	5522	value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c);
	5523
	5524	/* is this code point assigned, or do we use fallbacks? */
	5525	if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) \|\|
	5526	(UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
	5527	) {
	5528	goto unassigned;
	5529	}
	5530	}
	5531	}
	5532
	5533	/* write the output character bytes from value and length */
	5534	/* from the first if in the loop we know that targetCapacity>0 */
	5535	if(value<=0xff) {
	5536	/* this is easy because we know that there is enough space */
	5537	*target++=(uint8_t)value;
	5538	--targetCapacity;
	5539	} else /* length==2 */ {
	5540	*target++=(uint8_t)(value>>8);
	5541	if(2<=targetCapacity) {
	5542	*target++=(uint8_t)value;
	5543	targetCapacity-=2;
	5544	} else {
	5545	cnv->charErrorBuffer[0]=(char)value;
	5546	cnv->charErrorBufferLength=1;
	5547
	5548	/* target overflow */
	5549	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	5550	break;
	5551	}
	5552	}
	5553	continue;
	5554
	5555	unassigned:
	5556	{
	5557	/*
	5558	* Try an extension mapping.
	5559	* Pass in no source because we don't have UTF-16 input.
	5560	* If we have a partial match on c, we will return and revert
	5561	* to UTF-8->UTF-16->charset conversion.
	5562	*/
	5563	static const UChar nul=0;
	5564	const UChar *noSource=&nul;
	5565	c=_extFromU(cnv, cnv->sharedData,
	5566	c, &noSource, noSource,
	5567	&target, target+targetCapacity,
	5568	NULL, -1,
	5569	pFromUArgs->flush,
	5570	pErrorCode);
	5571
	5572	if(U_FAILURE(*pErrorCode)) {
	5573	/* not mappable or buffer overflow */
	5574	cnv->fromUChar32=c;
	5575	break;
	5576	} else if(cnv->preFromUFirstCP>=0) {
	5577	/*
	5578	* Partial match, return and revert to pivoting.
	5579	* In normal from-UTF-16 conversion, we would just continue
	5580	* but then exit the loop because the extension match would
	5581	* have consumed the source.
	5582	*/
	5583	*pErrorCode=U_USING_DEFAULT_WARNING;
	5584	break;
	5585	} else {
	5586	/* a mapping was written to the target, continue */
	5587
	5588	/* recalculate the targetCapacity after an extension mapping */
	5589	targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
	5590	continue;
	5591	}
	5592	}
	5593	} else {
	5594	/* target is full */
	5595	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
	5596	break;
	5597	}
	5598	}
	5599
	5600	/*
	5601	* The sourceLimit may have been adjusted before the conversion loop
	5602	* to stop before a truncated sequence.
	5603	* If so, then collect the truncated sequence now.
	5604	*/
	5605	if(U_SUCCESS(*pErrorCode) &&
	5606	cnv->preFromUFirstCP<0 &&
	5607	source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
	5608	c=utf8->toUBytes[0]=b=*source++;
	5609	toULength=1;
	5610	toULimit=U8_COUNT_BYTES(b);
	5611	while(source<sourceLimit) {
	5612	utf8->toUBytes[toULength++]=b=*source++;
	5613	c=(c<<6)+b;
	5614	}
	5615	utf8->toUnicodeStatus=c;
	5616	utf8->toULength=toULength;
	5617	utf8->mode=toULimit;
	5618	}
	5619
	5620	/* write back the updated pointers */
	5621	pToUArgs->source=(char *)source;
	5622	pFromUArgs->target=(char *)target;
	5623	}
	5624
	5625	/* miscellaneous ------------------------------------------------------------ */
	5626
	5627	static void U_CALLCONV
	5628	ucnv_MBCSGetStarters(const UConverter* cnv,
	5629	UBool starters[256],
	5630	UErrorCode *) {
	5631	const int32_t *state0;
	5632	int i;
	5633
	5634	state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
	5635	for(i=0; i<256; ++i) {
	5636	/* all bytes that cause a state transition from state 0 are lead bytes */
	5637	starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
	5638	}
	5639	}
	5640
	5641	/*
	5642	* This is an internal function that allows other converter implementations
	5643	* to check whether a byte is a lead byte.
	5644	*/
	5645	U_CFUNC UBool
	5646	ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
	5647	return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
	5648	}
	5649
	5650	static void U_CALLCONV
	5651	ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
	5652	int32_t offsetIndex,
	5653	UErrorCode *pErrorCode) {
	5654	UConverter *cnv=pArgs->converter;
	5655	char p, subchar;
	5656	char buffer[4];
	5657	int32_t length;
	5658
	5659	/* first, select between subChar and subChar1 */
	5660	if( cnv->subChar1!=0 &&
	5661	(cnv->sharedData->mbcs.extIndexes!=NULL ?
	5662	cnv->useSubChar1 :
	5663	(cnv->invalidUCharBuffer[0]<=0xff))
	5664	) {
	5665	/* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
	5666	subchar=(char *)&cnv->subChar1;
	5667	length=1;
	5668	} else {
	5669	/* select subChar in all other cases */
	5670	subchar=(char *)cnv->subChars;
	5671	length=cnv->subCharLen;
	5672	}
	5673
	5674	/* reset the selector for the next code point */
	5675	cnv->useSubChar1=FALSE;
	5676
	5677	if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
	5678	p=buffer;
	5679
	5680	/* fromUnicodeStatus contains prevLength */
	5681	switch(length) {
	5682	case 1:
	5683	if(cnv->fromUnicodeStatus==2) {
	5684	/* DBCS mode and SBCS sub char: change to SBCS */
	5685	cnv->fromUnicodeStatus=1;
	5686	*p++=UCNV_SI;
	5687	}
	5688	*p++=subchar[0];
	5689	break;
	5690	case 2:
	5691	if(cnv->fromUnicodeStatus<=1) {
	5692	/* SBCS mode and DBCS sub char: change to DBCS */
	5693	cnv->fromUnicodeStatus=2;
	5694	*p++=UCNV_SO;
	5695	}
	5696	*p++=subchar[0];
	5697	*p++=subchar[1];
	5698	break;
	5699	default:
	5700	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	5701	return;
	5702	}
	5703	subchar=buffer;
	5704	length=(int32_t)(p-buffer);
	5705	}
	5706
	5707	ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode);
	5708	}
	5709
	5710	U_CFUNC UConverterType
	5711	ucnv_MBCSGetType(const UConverter* converter) {
	5712	/* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
	5713	if(converter->sharedData->mbcs.countStates==1) {
	5714	return (UConverterType)UCNV_SBCS;
	5715	} else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
	5716	return (UConverterType)UCNV_EBCDIC_STATEFUL;
	5717	} else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
	5718	return (UConverterType)UCNV_DBCS;
	5719	}
	5720	return (UConverterType)UCNV_MBCS;
	5721	}
	5722
	5723	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */