icuSources/common/ucnv2022.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 2000-2006,2008 International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   file name:  ucnv2022.c
   7 *   encoding:   US-ASCII
   8 *   tab size:   8 (not used)
   9 *   indentation:4
  10 *
  11 *   created on: 2000feb03
  12 *   created by: Markus W. Scherer
  13 *
  14 *   Change history:
  15 *
  16 *   06/29/2000  helena  Major rewrite of the callback APIs.
  17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
  18 *                       Changed implementation of toUnicode
  19 *                       function
  20 *   08/21/2000  Ram     Added support for ISO-2022-KR
  21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
  22 *                       ucnvebdc.c
  23 *   09/20/2000  Ram     Added support for ISO-2022-CN
  24 *                       Added implementations for getNextUChar()
  25 *                       for specific 2022 country variants.
  26 *   10/31/2000  Ram     Implemented offsets logic functions
  27 */
  28
  29 #include "unicode/utypes.h"
  30
  31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
  32
  33 #include "unicode/ucnv.h"
  34 #include "unicode/uset.h"
  35 #include "unicode/ucnv_err.h"
  36 #include "unicode/ucnv_cb.h"
  37 #include "ucnv_imp.h"
  38 #include "ucnv_bld.h"
  39 #include "ucnv_cnv.h"
  40 #include "ucnvmbcs.h"
  41 #include "cstring.h"
  42 #include "cmemory.h"
  43
  44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  45
  46 #ifdef U_ENABLE_GENERIC_ISO_2022
  47 /*
  48  * I am disabling the generic ISO-2022 converter after proposing to do so on
  49  * the icu mailing list two days ago.
  50  *
  51  * Reasons:
  52  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
  53  *    its designation sequences, single shifts with return to the previous state,
  54  *    switch-with-no-return to UTF-16BE or similar, etc.
  55  *    This is unlike the language-specific variants like ISO-2022-JP which
  56  *    require a much smaller repertoire of ISO-2022 features.
  57  *    These variants continue to be supported.
  58  * 2. I believe that no one is really using the generic ISO-2022 converter
  59  *    but rather always one of the language-specific variants.
  60  *    Note that ICU's generic ISO-2022 converter has always output one escape
  61  *    sequence followed by UTF-8 for the whole stream.
  62  * 3. Switching between subcharsets is extremely slow, because each time
  63  *    the previous converter is closed and a new one opened,
  64  *    without any kind of caching, least-recently-used list, etc.
  65  * 4. The code is currently buggy, and given the above it does not seem
  66  *    reasonable to spend the time on maintenance.
  67  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
  68  *    This means, for example, that when ISO-8859-7 is designated, the following
  69  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
  70  *    The ICU ISO-2022 converter does not handle this - and has no information
  71  *    about which subconverter would have to be shifted vs. which is designed
  72  *    for 7-bit ISO-2022.
  73  *
  74  * Markus Scherer 2003-dec-03
  75  */
  76 #endif
  77
  78 static const char SHIFT_IN_STR[]  = "\x0F";
  79 static const char SHIFT_OUT_STR[] = "\x0E";
  80
  81 #define CR      0x0D
  82 #define LF      0x0A
  83 #define H_TAB   0x09
  84 #define V_TAB   0x0B
  85 #define SPACE   0x20
  86
  87 /*
  88  * ISO 2022 control codes must not be converted from Unicode
  89  * because they would mess up the byte stream.
  90  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
  91  * corresponding to SO, SI, and ESC.
  92  */
  93 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
  94
  95 /* for ISO-2022-JP and -CN implementations */
  96 typedef enum  {
  97         /* shared values */
  98         INVALID_STATE=-1,
  99         ASCII = 0,
 100
 101         SS2_STATE=0x10,
 102         SS3_STATE,
 103
 104         /* JP */
 105         ISO8859_1 = 1 ,
 106         ISO8859_7 = 2 ,
 107         JISX201  = 3,
 108         JISX208 = 4,
 109         JISX212 = 5,
 110         GB2312  =6,
 111         KSC5601 =7,
 112         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
 113
 114         /* CN */
 115         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
 116         GB2312_1=1,
 117         ISO_IR_165=2,
 118         CNS_11643=3,
 119
 120         /*
 121          * these are used in StateEnum and ISO2022State variables,
 122          * but CNS_11643 must be used to index into myConverterArray[]
 123          */
 124         CNS_11643_0=0x20,
 125         CNS_11643_1,
 126         CNS_11643_2,
 127         CNS_11643_3,
 128         CNS_11643_4,
 129         CNS_11643_5,
 130         CNS_11643_6,
 131         CNS_11643_7
 132 } StateEnum;
 133
 134 /* is the StateEnum charset value for a DBCS charset? */
 135 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
 136
 137 #define CSM(cs) ((uint16_t)1<<(cs))
 138
 139 /*
 140  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
 141  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
 142  *
 143  * Note: The converter uses some leniency:
 144  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
 145  *   all versions, not just JIS7 and JIS8.
 146  * - ICU does not distinguish between different versions of JIS X 0208.
 147  */
 148 static const uint16_t jpCharsetMasks[5]={
 149     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
 150     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
 151     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
 152     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
 153     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
 154 };
 155
 156 typedef enum {
 157         ASCII1=0,
 158         LATIN1,
 159         SBCS,
 160         DBCS,
 161         MBCS,
 162         HWKANA
 163 }Cnv2022Type;
 164
 165 typedef struct ISO2022State {
 166     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
 167     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
 168     int8_t prevG;       /* g before single shift (SS2 or SS3) */
 169 } ISO2022State;
 170
 171 #define UCNV_OPTIONS_VERSION_MASK 0xf
 172 #define UCNV_2022_MAX_CONVERTERS 10
 173
 174 typedef struct{
 175     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
 176     UConverter *currentConverter;
 177     Cnv2022Type currentType;
 178     ISO2022State toU2022State, fromU2022State;
 179     uint32_t key;
 180     uint32_t version;
 181 #ifdef U_ENABLE_GENERIC_ISO_2022
 182     UBool isFirstBuffer;
 183 #endif
 184     UBool isEmptySegment;
 185     char name[30];
 186     char locale[3];
 187 }UConverterDataISO2022;
 188
 189 /* Protos */
 190 /* ISO-2022 ----------------------------------------------------------------- */
 191
 192 /*Forward declaration */
 193 U_CFUNC void
 194 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
 195                       UErrorCode * err);
 196 U_CFUNC void
 197 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
 198                                     UErrorCode * err);
 199
 200 #define ESC_2022 0x1B /*ESC*/
 201
 202 typedef enum
 203 {
 204         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
 205         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
 206         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
 207         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
 208 } UCNV_TableStates_2022;
 209
 210 /*
 211 * The way these state transition arrays work is:
 212 * ex : ESC$B is the sequence for JISX208
 213 *      a) First Iteration: char is ESC
 214 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
 215 *             int x = normalize_esq_chars_2022[27] which is equal to 1
 216 *         ii) Search for this value in escSeqStateTable_Key_2022[]
 217 *             value of x is stored at escSeqStateTable_Key_2022[0]
 218 *        iii) Save this index as offset
 219 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
 220 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
 221 *     b) Switch on this state and continue to next char
 222 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
 223 *             which is normalize_esq_chars_2022[36] == 4
 224 *         ii) x is currently 1(from above)
 225 *               x<<=5 -- x is now 32
 226 *               x+=normalize_esq_chars_2022[36]
 227 *               now x is 36
 228 *        iii) Search for this value in escSeqStateTable_Key_2022[]
 229 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
 230 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
 231 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
 232 *     c) Switch on this state and continue to next char
 233 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
 234 *        ii) x is currently 36 (from above)
 235 *            x<<=5 -- x is now 1152
 236 *            x+=normalize_esq_chars_2022[66]
 237 *            now x is 1161
 238 *       iii) Search for this value in escSeqStateTable_Key_2022[]
 239 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
 240 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
 241 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
 242 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
 243 */
 244
 245
 246 /*Below are the 3 arrays depicting a state transition table*/
 247 static const int8_t normalize_esq_chars_2022[256] = {
 248 /*       0      1       2       3       4      5       6        7       8       9           */
 249
 250          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 251         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 252         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
 253         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
 254         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
 255         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 256         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
 257         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
 258         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
 259         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 260         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 261         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 262         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 263         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 264         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 265         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 266         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 267         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 268         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 269         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 270         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 271         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 272         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 273         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 274         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
 275         ,0     ,0      ,0      ,0      ,0      ,0
 276 };
 277
 278 #ifdef U_ENABLE_GENERIC_ISO_2022
 279 /*
 280  * When the generic ISO-2022 converter is completely removed, not just disabled
 281  * per #ifdef, then the following state table and the associated tables that are
 282  * dimensioned with MAX_STATES_2022 should be trimmed.
 283  *
 284  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
 285  * the associated escape sequences starting with ESC ( B should be removed.
 286  * This includes the ones with key values 1097 and all of the ones above 1000000.
 287  *
 288  * For the latter, the tables can simply be truncated.
 289  * For the former, since the tables must be kept parallel, it is probably best
 290  * to simply duplicate an adjacent table cell, parallel in all tables.
 291  *
 292  * It may make sense to restructure the tables, especially by using small search
 293  * tables for the variants instead of indexing them parallel to the table here.
 294  */
 295 #endif
 296
 297 #define MAX_STATES_2022 74
 298 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
 299 /*   0           1           2           3           4           5           6           7           8           9           */
 300
 301      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
 302     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
 303     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
 304     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
 305     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
 306     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
 307     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
 308     ,35947631   ,35947635   ,35947636   ,35947638
 309 };
 310
 311 #ifdef U_ENABLE_GENERIC_ISO_2022
 312
 313 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
 314  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
 315
 316      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
 317     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
 318     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
 319     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
 320     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
 321     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
 322     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
 323     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
 324 };
 325
 326 #endif
 327
 328 static const UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = {
 329 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
 330      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 331     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 332     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
 333     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 334     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 335     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 336     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 337     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
 338 };
 339
 340
 341 /* Type def for refactoring changeState_2022 code*/
 342 typedef enum{
 343 #ifdef U_ENABLE_GENERIC_ISO_2022
 344     ISO_2022=0,
 345 #endif
 346     ISO_2022_JP=1,
 347     ISO_2022_KR=2,
 348     ISO_2022_CN=3
 349 } Variant2022;
 350
 351 /*********** ISO 2022 Converter Protos ***********/
 352 static void
 353 _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode);
 354
 355 static void
 356  _ISO2022Close(UConverter *converter);
 357
 358 static void
 359 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
 360
 361 static const char*
 362 _ISO2022getName(const UConverter* cnv);
 363
 364 static void
 365 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
 366
 367 static UConverter *
 368 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
 369
 370 #ifdef U_ENABLE_GENERIC_ISO_2022
 371 static void
 372 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
 373 #endif
 374
 375 /*const UConverterSharedData _ISO2022Data;*/
 376 static const UConverterSharedData _ISO2022JPData;
 377 static const UConverterSharedData _ISO2022KRData;
 378 static const UConverterSharedData _ISO2022CNData;
 379
 380 /*************** Converter implementations ******************/
 381
 382 /* The purpose of this function is to get around gcc compiler warnings. */
 383 static U_INLINE void
 384 fromUWriteUInt8(UConverter *cnv,
 385                  const char *bytes, int32_t length,
 386                  uint8_t **target, const char *targetLimit,
 387                  int32_t **offsets,
 388                  int32_t sourceIndex,
 389                  UErrorCode *pErrorCode)
 390 {
 391     char *targetChars = (char *)*target;
 392     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
 393                          offsets, sourceIndex, pErrorCode);
 394     *target = (uint8_t*)targetChars;
 395
 396 }
 397
 398 static U_INLINE void
 399 setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){
 400     if(myConverterData->version == 1) {
 401         UConverter *cnv = myConverterData->currentConverter;
 402
 403         cnv->toUnicodeStatus=0;     /* offset */
 404         cnv->mode=0;                /* state */
 405         cnv->toULength=0;           /* byteIndex */
 406     }
 407 }
 408
 409 static U_INLINE void
 410 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
 411    /* in ISO-2022-KR the designator sequence appears only once
 412     * in a file so we append it only once
 413     */
 414     if( converter->charErrorBufferLength==0){
 415
 416         converter->charErrorBufferLength = 4;
 417         converter->charErrorBuffer[0] = 0x1b;
 418         converter->charErrorBuffer[1] = 0x24;
 419         converter->charErrorBuffer[2] = 0x29;
 420         converter->charErrorBuffer[3] = 0x43;
 421     }
 422     if(myConverterData->version == 1) {
 423         UConverter *cnv = myConverterData->currentConverter;
 424
 425         cnv->fromUChar32=0;
 426         cnv->fromUnicodeStatus=1;   /* prevLength */
 427     }
 428 }
 429
 430 static void
 431 _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode){
 432
 433     char myLocale[6]={' ',' ',' ',' ',' ',' '};
 434
 435     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
 436     if(cnv->extraInfo != NULL) {
 437         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
 438         uint32_t version;
 439
 440         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
 441         myConverterData->currentType = ASCII1;
 442         cnv->fromUnicodeStatus =FALSE;
 443         if(locale){
 444             uprv_strncpy(myLocale, locale, sizeof(myLocale));
 445         }
 446         version = options & UCNV_OPTIONS_VERSION_MASK;
 447         myConverterData->version = version;
 448         if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
 449             (myLocale[2]=='_' || myLocale[2]=='\0'))
 450         {
 451             size_t len=0;
 452             /* open the required converters and cache them */
 453             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
 454                 myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode);
 455             }
 456             myConverterData->myConverterArray[JISX201]      = ucnv_loadSharedData("JISX0201", NULL, errorCode);
 457             myConverterData->myConverterArray[JISX208]      = ucnv_loadSharedData("jisx-208", NULL, errorCode);
 458             if(jpCharsetMasks[version]&CSM(JISX212)) {
 459                 myConverterData->myConverterArray[JISX212]  = ucnv_loadSharedData("jisx-212", NULL, errorCode);
 460             }
 461             if(jpCharsetMasks[version]&CSM(GB2312)) {
 462                 myConverterData->myConverterArray[GB2312]   = ucnv_loadSharedData("ibm-5478", NULL, errorCode);   /* gb_2312_80-1 */
 463             }
 464             if(jpCharsetMasks[version]&CSM(KSC5601)) {
 465                 myConverterData->myConverterArray[KSC5601]  = ucnv_loadSharedData("ksc_5601", NULL, errorCode);
 466             }
 467
 468             /* set the function pointers to appropriate funtions */
 469             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
 470             uprv_strcpy(myConverterData->locale,"ja");
 471
 472             uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
 473             len = uprv_strlen(myConverterData->name);
 474             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
 475             myConverterData->name[len+1]='\0';
 476         }
 477         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
 478             (myLocale[2]=='_' || myLocale[2]=='\0'))
 479         {
 480             if (version==1){
 481                 myConverterData->currentConverter=
 482                     ucnv_open("icu-internal-25546",errorCode);
 483
 484                 if (U_FAILURE(*errorCode)) {
 485                     _ISO2022Close(cnv);
 486                     return;
 487                 }
 488
 489                 uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
 490                 uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
 491                 cnv->subCharLen = myConverterData->currentConverter->subCharLen;
 492             }else{
 493                 myConverterData->currentConverter=ucnv_open("ibm-949",errorCode);
 494
 495                 if (U_FAILURE(*errorCode)) {
 496                     _ISO2022Close(cnv);
 497                     return;
 498                 }
 499
 500                 myConverterData->version = 0;
 501                 uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
 502             }
 503
 504             /* initialize the state variables */
 505             setInitialStateToUnicodeKR(cnv, myConverterData);
 506             setInitialStateFromUnicodeKR(cnv, myConverterData);
 507
 508             /* set the function pointers to appropriate funtions */
 509             cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
 510             uprv_strcpy(myConverterData->locale,"ko");
 511         }
 512         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
 513             (myLocale[2]=='_' || myLocale[2]=='\0'))
 514         {
 515
 516             /* open the required converters and cache them */
 517             myConverterData->myConverterArray[GB2312_1]         = ucnv_loadSharedData("ibm-5478", NULL, errorCode);
 518             if(version==1) {
 519                 myConverterData->myConverterArray[ISO_IR_165]   = ucnv_loadSharedData("iso-ir-165", NULL, errorCode);
 520             }
 521             myConverterData->myConverterArray[CNS_11643]        = ucnv_loadSharedData("cns-11643-1992", NULL, errorCode);
 522
 523
 524             /* set the function pointers to appropriate funtions */
 525             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
 526             uprv_strcpy(myConverterData->locale,"cn");
 527
 528             if (version==1){
 529                 uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
 530             }else{
 531                 myConverterData->version = 0;
 532                 uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
 533             }
 534         }
 535         else{
 536 #ifdef U_ENABLE_GENERIC_ISO_2022
 537             myConverterData->isFirstBuffer = TRUE;
 538
 539             /* append the UTF-8 escape sequence */
 540             cnv->charErrorBufferLength = 3;
 541             cnv->charErrorBuffer[0] = 0x1b;
 542             cnv->charErrorBuffer[1] = 0x25;
 543             cnv->charErrorBuffer[2] = 0x42;
 544
 545             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
 546             /* initialize the state variables */
 547             uprv_strcpy(myConverterData->name,"ISO_2022");
 548 #else
 549             *errorCode = U_UNSUPPORTED_ERROR;
 550             return;
 551 #endif
 552         }
 553
 554         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
 555
 556         if(U_FAILURE(*errorCode)) {
 557             _ISO2022Close(cnv);
 558         }
 559     } else {
 560         *errorCode = U_MEMORY_ALLOCATION_ERROR;
 561     }
 562 }
 563
 564
 565 static void
 566 _ISO2022Close(UConverter *converter) {
 567     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
 568     UConverterSharedData **array = myData->myConverterArray;
 569     int32_t i;
 570
 571     if (converter->extraInfo != NULL) {
 572         /*close the array of converter pointers and free the memory*/
 573         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
 574             if(array[i]!=NULL) {
 575                 ucnv_unloadSharedDataIfReady(array[i]);
 576             }
 577         }
 578
 579         ucnv_close(myData->currentConverter);
 580
 581         if(!converter->isExtraLocal){
 582             uprv_free (converter->extraInfo);
 583             converter->extraInfo = NULL;
 584         }
 585     }
 586 }
 587
 588 static void
 589 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
 590     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
 591     if(choice<=UCNV_RESET_TO_UNICODE) {
 592         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
 593         myConverterData->key = 0;
 594         myConverterData->isEmptySegment = FALSE;
 595     }
 596     if(choice!=UCNV_RESET_TO_UNICODE) {
 597         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
 598     }
 599 #ifdef U_ENABLE_GENERIC_ISO_2022
 600     if(myConverterData->locale[0] == 0){
 601         if(choice<=UCNV_RESET_TO_UNICODE) {
 602             myConverterData->isFirstBuffer = TRUE;
 603             myConverterData->key = 0;
 604             if (converter->mode == UCNV_SO){
 605                 ucnv_close (myConverterData->currentConverter);
 606                 myConverterData->currentConverter=NULL;
 607             }
 608             converter->mode = UCNV_SI;
 609         }
 610         if(choice!=UCNV_RESET_TO_UNICODE) {
 611             /* re-append UTF-8 escape sequence */
 612             converter->charErrorBufferLength = 3;
 613             converter->charErrorBuffer[0] = 0x1b;
 614             converter->charErrorBuffer[1] = 0x28;
 615             converter->charErrorBuffer[2] = 0x42;
 616         }
 617     }
 618     else
 619 #endif
 620     {
 621         /* reset the state variables */
 622         if(myConverterData->locale[0] == 'k'){
 623             if(choice<=UCNV_RESET_TO_UNICODE) {
 624                 setInitialStateToUnicodeKR(converter, myConverterData);
 625             }
 626             if(choice!=UCNV_RESET_TO_UNICODE) {
 627                 setInitialStateFromUnicodeKR(converter, myConverterData);
 628             }
 629         }
 630     }
 631 }
 632
 633 static const char*
 634 _ISO2022getName(const UConverter* cnv){
 635     if(cnv->extraInfo){
 636         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
 637         return myData->name;
 638     }
 639     return NULL;
 640 }
 641
 642
 643 /*************** to unicode *******************/
 644 /****************************************************************************
 645  * Recognized escape sequences are
 646  * <ESC>(B  ASCII
 647  * <ESC>.A  ISO-8859-1
 648  * <ESC>.F  ISO-8859-7
 649  * <ESC>(J  JISX-201
 650  * <ESC>(I  JISX-201
 651  * <ESC>$B  JISX-208
 652  * <ESC>$@  JISX-208
 653  * <ESC>$(D JISX-212
 654  * <ESC>$A  GB2312
 655  * <ESC>$(C KSC5601
 656  */
 657 static const StateEnum nextStateToUnicodeJP[MAX_STATES_2022]= {
 658 /*      0                1               2               3               4               5               6               7               8               9    */
 659     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 660     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
 661     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 662     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
 663     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 664     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 665     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 666     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 667 };
 668
 669 /*************** to unicode *******************/
 670 static const StateEnum nextStateToUnicodeCN[MAX_STATES_2022]= {
 671 /*      0                1               2               3               4               5               6               7               8               9    */
 672      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 673     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 674     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 675     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 676     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
 677     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 678     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 679     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 680 };
 681
 682
 683 static UCNV_TableStates_2022
 684 getKey_2022(char c,int32_t* key,int32_t* offset){
 685     int32_t togo;
 686     int32_t low = 0;
 687     int32_t hi = MAX_STATES_2022;
 688     int32_t oldmid=0;
 689
 690     togo = normalize_esq_chars_2022[(uint8_t)c];
 691     if(togo == 0) {
 692         /* not a valid character anywhere in an escape sequence */
 693         *key = 0;
 694         *offset = 0;
 695         return INVALID_2022;
 696     }
 697     togo = (*key << 5) + togo;
 698
 699     while (hi != low)  /*binary search*/{
 700
 701         register int32_t mid = (hi+low) >> 1; /*Finds median*/
 702
 703         if (mid == oldmid)
 704             break;
 705
 706         if (escSeqStateTable_Key_2022[mid] > togo){
 707             hi = mid;
 708         }
 709         else if (escSeqStateTable_Key_2022[mid] < togo){
 710             low = mid;
 711         }
 712         else /*we found it*/{
 713             *key = togo;
 714             *offset = mid;
 715             return escSeqStateTable_Value_2022[mid];
 716         }
 717         oldmid = mid;
 718
 719     }
 720
 721     *key = 0;
 722     *offset = 0;
 723     return INVALID_2022;
 724 }
 725
 726 /*runs through a state machine to determine the escape sequence - codepage correspondance
 727  */
 728 static void
 729 changeState_2022(UConverter* _this,
 730                 const char** source,
 731                 const char* sourceLimit,
 732                 Variant2022 var,
 733                 UErrorCode* err){
 734     UCNV_TableStates_2022 value;
 735     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
 736     uint32_t key = myData2022->key;
 737     int32_t offset = 0;
 738     char c;
 739
 740     value = VALID_NON_TERMINAL_2022;
 741     while (*source < sourceLimit) {
 742         c = *(*source)++;
 743         _this->toUBytes[_this->toULength++]=(uint8_t)c;
 744         value = getKey_2022(c,(int32_t *) &key, &offset);
 745
 746         switch (value){
 747
 748         case VALID_NON_TERMINAL_2022 :
 749             /* continue with the loop */
 750             break;
 751
 752         case VALID_TERMINAL_2022:
 753             key = 0;
 754             goto DONE;
 755
 756         case INVALID_2022:
 757             goto DONE;
 758
 759         case VALID_MAYBE_TERMINAL_2022:
 760 #ifdef U_ENABLE_GENERIC_ISO_2022
 761             /* ESC ( B is ambiguous only for ISO_2022 itself */
 762             if(var == ISO_2022) {
 763                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
 764                 _this->toULength = 0;
 765
 766                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
 767
 768                 /* continue with the loop */
 769                 value = VALID_NON_TERMINAL_2022;
 770                 break;
 771             } else
 772 #endif
 773             {
 774                 /* not ISO_2022 itself, finish here */
 775                 value = VALID_TERMINAL_2022;
 776                 key = 0;
 777                 goto DONE;
 778             }
 779         }
 780     }
 781
 782 DONE:
 783     myData2022->key = key;
 784
 785     if (value == VALID_NON_TERMINAL_2022) {
 786         /* indicate that the escape sequence is incomplete: key!=0 */
 787         return;
 788     } else if (value == INVALID_2022 ) {
 789         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 790         return;
 791     } else /* value == VALID_TERMINAL_2022 */ {
 792         switch(var){
 793 #ifdef U_ENABLE_GENERIC_ISO_2022
 794         case ISO_2022:
 795         {
 796             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
 797             if(chosenConverterName == NULL) {
 798                 /* SS2 or SS3 */
 799                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 800                 return;
 801             }
 802
 803             _this->mode = UCNV_SI;
 804             ucnv_close(myData2022->currentConverter);
 805             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
 806             if(U_SUCCESS(*err)) {
 807                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
 808                 _this->mode = UCNV_SO;
 809             }
 810             break;
 811         }
 812 #endif
 813         case ISO_2022_JP:
 814             {
 815                 StateEnum tempState=nextStateToUnicodeJP[offset];
 816                 switch(tempState) {
 817                 case INVALID_STATE:
 818                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 819                     break;
 820                 case SS2_STATE:
 821                     if(myData2022->toU2022State.cs[2]!=0) {
 822                         if(myData2022->toU2022State.g<2) {
 823                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
 824                         }
 825                         myData2022->toU2022State.g=2;
 826                     } else {
 827                         /* illegal to have SS2 before a matching designator */
 828                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 829                     }
 830                     break;
 831                 /* case SS3_STATE: not used in ISO-2022-JP-x */
 832                 case ISO8859_1:
 833                 case ISO8859_7:
 834                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 835                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 836                     } else {
 837                         /* G2 charset for SS2 */
 838                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
 839                     }
 840                     break;
 841                 default:
 842                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 843                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 844                     } else {
 845                         /* G0 charset */
 846                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
 847                     }
 848                     break;
 849                 }
 850             }
 851             break;
 852         case ISO_2022_CN:
 853             {
 854                 StateEnum tempState=nextStateToUnicodeCN[offset];
 855                 switch(tempState) {
 856                 case INVALID_STATE:
 857                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 858                     break;
 859                 case SS2_STATE:
 860                     if(myData2022->toU2022State.cs[2]!=0) {
 861                         if(myData2022->toU2022State.g<2) {
 862                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
 863                         }
 864                         myData2022->toU2022State.g=2;
 865                     } else {
 866                         /* illegal to have SS2 before a matching designator */
 867                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 868                     }
 869                     break;
 870                 case SS3_STATE:
 871                     if(myData2022->toU2022State.cs[3]!=0) {
 872                         if(myData2022->toU2022State.g<2) {
 873                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
 874                         }
 875                         myData2022->toU2022State.g=3;
 876                     } else {
 877                         /* illegal to have SS3 before a matching designator */
 878                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 879                     }
 880                     break;
 881                 case ISO_IR_165:
 882                     if(myData2022->version==0) {
 883                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 884                         break;
 885                     }
 886                     /*fall through*/
 887                 case GB2312_1:
 888                     /*fall through*/
 889                 case CNS_11643_1:
 890                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
 891                     break;
 892                 case CNS_11643_2:
 893                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
 894                     break;
 895                 default:
 896                     /* other CNS 11643 planes */
 897                     if(myData2022->version==0) {
 898                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 899                     } else {
 900                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
 901                     }
 902                     break;
 903                 }
 904             }
 905             break;
 906         case ISO_2022_KR:
 907             if(offset==0x30){
 908                 /* nothing to be done, just accept this one escape sequence */
 909             } else {
 910                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 911             }
 912             break;
 913
 914         default:
 915             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 916             break;
 917         }
 918     }
 919     if(U_SUCCESS(*err)) {
 920         _this->toULength = 0;
 921     }
 922 }
 923
 924 /*Checks the characters of the buffer against valid 2022 escape sequences
 925 *if the match we return a pointer to the initial start of the sequence otherwise
 926 *we return sourceLimit
 927 */
 928 /*for 2022 looks ahead in the stream
 929  *to determine the longest possible convertible
 930  *data stream
 931  */
 932 static U_INLINE const char*
 933 getEndOfBuffer_2022(const char** source,
 934                    const char* sourceLimit,
 935                    UBool flush){
 936
 937     const char* mySource = *source;
 938
 939 #ifdef U_ENABLE_GENERIC_ISO_2022
 940     if (*source >= sourceLimit)
 941         return sourceLimit;
 942
 943     do{
 944
 945         if (*mySource == ESC_2022){
 946             int8_t i;
 947             int32_t key = 0;
 948             int32_t offset;
 949             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
 950
 951             /* Kludge: I could not
 952             * figure out the reason for validating an escape sequence
 953             * twice - once here and once in changeState_2022().
 954             * is it possible to have an ESC character in a ISO2022
 955             * byte stream which is valid in a code page? Is it legal?
 956             */
 957             for (i=0;
 958             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
 959             i++) {
 960                 value =  getKey_2022(*(mySource+i), &key, &offset);
 961             }
 962             if (value > 0 || *mySource==ESC_2022)
 963                 return mySource;
 964
 965             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
 966                 return sourceLimit;
 967         }
 968     }while (++mySource < sourceLimit);
 969
 970     return sourceLimit;
 971 #else
 972     while(mySource < sourceLimit && *mySource != ESC_2022) {
 973         ++mySource;
 974     }
 975     return mySource;
 976 #endif
 977 }
 978
 979
 980 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
 981  * any future change in _MBCSFromUChar32() function should be reflected in
 982  * this macro
 983  */
 984 static U_INLINE void
 985 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
 986                                          UChar32 c,
 987                                          uint32_t* value,
 988                                          UBool useFallback,
 989                                          int32_t *length,
 990                                          int outputType)
 991 {
 992     const int32_t *cx;
 993     const uint16_t *table;
 994     uint32_t stage2Entry;
 995     uint32_t myValue;
 996     const uint8_t *p;
 997     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
 998     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
 999         table=sharedData->mbcs.fromUnicodeTable;
1000         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
1001         /* get the bytes and the length for the output */
1002         if(outputType==MBCS_OUTPUT_2){
1003             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
1004             if(myValue<=0xff) {
1005                 *length=1;
1006             } else {
1007                 *length=2;
1008             }
1009         } else /* outputType==MBCS_OUTPUT_3 */ {
1010             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
1011             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
1012             if(myValue<=0xff) {
1013                 *length=1;
1014             } else if(myValue<=0xffff) {
1015                 *length=2;
1016             } else {
1017                 *length=3;
1018             }
1019         }
1020         /* is this code point assigned, or do we use fallbacks? */
1021         if( (stage2Entry&(1<<(16+(c&0xf))))!=0 ||
1022             (FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0)
1023         ) {
1024             /*
1025              * We allow a 0 byte output if the "assigned" bit is set for this entry.
1026              * There is no way with this data structure for fallback output
1027              * to be a zero byte.
1028              */
1029             /* assigned */
1030             *value=myValue;
1031             return;
1032         }
1033     }
1034
1035     cx=sharedData->mbcs.extIndexes;
1036     if(cx!=NULL) {
1037         *length=ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
1038         return;
1039     }
1040
1041     /* unassigned */
1042     *length=0;
1043 }
1044
1045 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
1046  * any future change in _MBCSSingleFromUChar32() function should be reflected in
1047  * this macro
1048  */
1049 static U_INLINE void
1050 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
1051                                        UChar32 c,
1052                                        uint32_t* retval,
1053                                        UBool useFallback)
1054 {
1055     const uint16_t *table;
1056     int32_t value;
1057     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
1058     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
1059         *retval=(uint16_t)-1;
1060         return;
1061     }
1062     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
1063     table=sharedData->mbcs.fromUnicodeTable;
1064     /* get the byte for the output */
1065     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
1066     /* is this code point assigned, or do we use fallbacks? */
1067     if(useFallback ? value>=0x800 : value>=0xc00) {
1068         value &=0xff;
1069     } else {
1070         value= -1;
1071     }
1072     *retval=(uint16_t) value;
1073 }
1074
1075 #ifdef U_ENABLE_GENERIC_ISO_2022
1076
1077 /**********************************************************************************
1078 *  ISO-2022 Converter
1079 *
1080 *
1081 */
1082
1083 static void
1084 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
1085                                                            UErrorCode* err){
1086     const char* mySourceLimit, *realSourceLimit;
1087     const char* sourceStart;
1088     const UChar* myTargetStart;
1089     UConverter* saveThis;
1090     UConverterDataISO2022* myData;
1091     int8_t length;
1092
1093     saveThis = args->converter;
1094     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
1095
1096     realSourceLimit = args->sourceLimit;
1097     while (args->source < realSourceLimit) {
1098         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
1099             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
1100             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
1101
1102             if(args->source < mySourceLimit) {
1103                 if(myData->currentConverter==NULL) {
1104                     myData->currentConverter = ucnv_open("ASCII",err);
1105                     if(U_FAILURE(*err)){
1106                         return;
1107                     }
1108
1109                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
1110                     saveThis->mode = UCNV_SO;
1111                 }
1112
1113                 /* convert to before the ESC or until the end of the buffer */
1114                 myData->isFirstBuffer=FALSE;
1115                 sourceStart = args->source;
1116                 myTargetStart = args->target;
1117                 args->converter = myData->currentConverter;
1118                 ucnv_toUnicode(args->converter,
1119                     &args->target,
1120                     args->targetLimit,
1121                     &args->source,
1122                     mySourceLimit,
1123                     args->offsets,
1124                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
1125                     err);
1126                 args->converter = saveThis;
1127
1128                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
1129                     /* move the overflow buffer */
1130                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
1131                     myData->currentConverter->UCharErrorBufferLength = 0;
1132                     if(length > 0) {
1133                         uprv_memcpy(saveThis->UCharErrorBuffer,
1134                                     myData->currentConverter->UCharErrorBuffer,
1135                                     length*U_SIZEOF_UCHAR);
1136                     }
1137                     return;
1138                 }
1139
1140                 /*
1141                  * At least one of:
1142                  * -Error while converting
1143                  * -Done with entire buffer
1144                  * -Need to write offsets or update the current offset
1145                  *  (leave that up to the code in ucnv.c)
1146                  *
1147                  * or else we just stopped at an ESC byte and continue with changeState_2022()
1148                  */
1149                 if (U_FAILURE(*err) ||
1150                     (args->source == realSourceLimit) ||
1151                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
1152                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
1153                 ) {
1154                     /* copy partial or error input for truncated detection and error handling */
1155                     if(U_FAILURE(*err)) {
1156                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
1157                         if(length > 0) {
1158                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
1159                         }
1160                     } else {
1161                         length = saveThis->toULength = myData->currentConverter->toULength;
1162                         if(length > 0) {
1163                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
1164                             if(args->source < mySourceLimit) {
1165                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
1166                             }
1167                         }
1168                     }
1169                     return;
1170                 }
1171             }
1172         }
1173
1174         sourceStart = args->source;
1175         changeState_2022(args->converter,
1176                &(args->source),
1177                realSourceLimit,
1178                ISO_2022,
1179                err);
1180         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
1181             /* let the ucnv.c code update its current offset */
1182             return;
1183         }
1184     }
1185 }
1186
1187 #endif
1188
1189 /*
1190  * To Unicode Callback helper function
1191  */
1192 static void
1193 toUnicodeCallback(UConverter *cnv,
1194                   const uint32_t sourceChar, const uint32_t targetUniChar,
1195                   UErrorCode* err){
1196     if(sourceChar>0xff){
1197         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
1198         cnv->toUBytes[1] = (uint8_t)sourceChar;
1199         cnv->toULength = 2;
1200     }
1201     else{
1202         cnv->toUBytes[0] =(char) sourceChar;
1203         cnv->toULength = 2;
1204     }
1205
1206     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
1207         *err = U_INVALID_CHAR_FOUND;
1208     }
1209     else{
1210         *err = U_ILLEGAL_CHAR_FOUND;
1211     }
1212 }
1213
1214 /**************************************ISO-2022-JP*************************************************/
1215
1216 /************************************** IMPORTANT **************************************************
1217 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
1218 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
1219 * The converter iterates over each Unicode codepoint
1220 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
1221 * processed one char at a time it would make sense to reduce the extra processing a canned converter
1222 * would do as far as possible.
1223 *
1224 * If the implementation of these macros or structure of sharedData struct change in the future, make
1225 * sure that ISO-2022 is also changed.
1226 ***************************************************************************************************
1227 */
1228
1229 /***************************************************************************************************
1230 * Rules for ISO-2022-jp encoding
1231 * (i)   Escape sequences must be fully contained within a line they should not
1232 *       span new lines or CRs
1233 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
1234 *       JIS-Roman character escape sequence should follow before the line terminates
1235 * (iii) If the first character on the line is represented by two bytes then a two
1236 *       byte character escape sequence should precede it
1237 * (iv)  If no escape sequence is encountered then the characters are ASCII
1238 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
1239 *       and invoked with SS2 (ESC N).
1240 * (vi)  If there is any G0 designation in text, there must be a switch to
1241 *       ASCII or to JIS X 0201-Roman before a space character (but not
1242 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
1243 *       characters such as tab or CRLF.
1244 * (vi)  Supported encodings:
1245 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
1246 *
1247 *  source : RFC-1554
1248 *
1249 *          JISX201, JISX208,JISX212 : new .cnv data files created
1250 *          KSC5601 : alias to ibm-949 mapping table
1251 *          GB2312 : alias to ibm-1386 mapping table
1252 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
1253 *          ISO-8859-7 : alisas to ibm-9409 mapping table
1254 */
1255
1256 /* preference order of JP charsets */
1257 static const StateEnum jpCharsetPref[]={
1258     ASCII,
1259     JISX201,
1260     ISO8859_1,
1261     ISO8859_7,
1262     JISX208,
1263     JISX212,
1264     GB2312,
1265     KSC5601,
1266     HWKANA_7BIT
1267 };
1268
1269 /*
1270  * The escape sequences must be in order of the enum constants like JISX201  = 3,
1271  * not in order of jpCharsetPref[]!
1272  */
1273 static const char escSeqChars[][6] ={
1274     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
1275     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
1276     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
1277     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
1278     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
1279     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
1280     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
1281     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
1282     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
1283
1284 };
1285 static  const int32_t escSeqCharsLen[] ={
1286     3, /* length of <ESC>(B  ASCII       */
1287     3, /* length of <ESC>.A  ISO-8859-1  */
1288     3, /* length of <ESC>.F  ISO-8859-7  */
1289     3, /* length of <ESC>(J  JISX-201    */
1290     3, /* length of <ESC>$B  JISX-208    */
1291     4, /* length of <ESC>$(D JISX-212    */
1292     3, /* length of <ESC>$A  GB2312      */
1293     4, /* length of <ESC>$(C KSC5601     */
1294     3  /* length of <ESC>(I  HWKANA_7BIT */
1295 };
1296
1297 /*
1298 * The iteration over various code pages works this way:
1299 * i)   Get the currentState from myConverterData->currentState
1300 * ii)  Check if the character is mapped to a valid character in the currentState
1301 *      Yes ->  a) set the initIterState to currentState
1302 *       b) remain in this state until an invalid character is found
1303 *      No  ->  a) go to the next code page and find the character
1304 * iii) Before changing the state increment the current state check if the current state
1305 *      is equal to the intitIteration state
1306 *      Yes ->  A character that cannot be represented in any of the supported encodings
1307 *       break and return a U_INVALID_CHARACTER error
1308 *      No  ->  Continue and find the character in next code page
1309 *
1310 *
1311 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
1312 */
1313
1314 static void
1315 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
1316     UConverterDataISO2022 *converterData;
1317     ISO2022State *pFromU2022State;
1318     uint8_t *target = (uint8_t *) args->target;
1319     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
1320     const UChar* source = args->source;
1321     const UChar* sourceLimit = args->sourceLimit;
1322     int32_t* offsets = args->offsets;
1323     UChar32 sourceChar;
1324     char buffer[8];
1325     int32_t len, outLen;
1326     int8_t choices[10];
1327     int32_t choiceCount;
1328     uint32_t targetValue = 0;
1329     UBool useFallback;
1330
1331     int32_t i;
1332     int8_t cs, g;
1333
1334     /* set up the state */
1335     converterData     = (UConverterDataISO2022*)args->converter->extraInfo;
1336     pFromU2022State   = &converterData->fromU2022State;
1337     useFallback       = args->converter->useFallback;
1338
1339     choiceCount = 0;
1340
1341     /* check if the last codepoint of previous buffer was a lead surrogate*/
1342     if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
1343         goto getTrail;
1344     }
1345
1346     while(source < sourceLimit) {
1347         if(target < targetLimit) {
1348
1349             sourceChar  = *(source++);
1350             /*check if the char is a First surrogate*/
1351             if(UTF_IS_SURROGATE(sourceChar)) {
1352                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
1353 getTrail:
1354                     /*look ahead to find the trail surrogate*/
1355                     if(source < sourceLimit) {
1356                         /* test the following code unit */
1357                         UChar trail=(UChar) *source;
1358                         if(UTF_IS_SECOND_SURROGATE(trail)) {
1359                             source++;
1360                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
1361                             args->converter->fromUChar32=0x00;
1362                             /* convert this supplementary code point */
1363                             /* exit this condition tree */
1364                         } else {
1365                             /* this is an unmatched lead code unit (1st surrogate) */
1366                             /* callback(illegal) */
1367                             *err=U_ILLEGAL_CHAR_FOUND;
1368                             args->converter->fromUChar32=sourceChar;
1369                             break;
1370                         }
1371                     } else {
1372                         /* no more input */
1373                         args->converter->fromUChar32=sourceChar;
1374                         break;
1375                     }
1376                 } else {
1377                     /* this is an unmatched trail code unit (2nd surrogate) */
1378                     /* callback(illegal) */
1379                     *err=U_ILLEGAL_CHAR_FOUND;
1380                     args->converter->fromUChar32=sourceChar;
1381                     break;
1382                 }
1383             }
1384
1385             /* do not convert SO/SI/ESC */
1386             if(IS_2022_CONTROL(sourceChar)) {
1387                 /* callback(illegal) */
1388                 *err=U_ILLEGAL_CHAR_FOUND;
1389                 args->converter->fromUChar32=sourceChar;
1390                 break;
1391             }
1392
1393             /* do the conversion */
1394
1395             if(choiceCount == 0) {
1396                 uint16_t csm;
1397
1398                 /*
1399                  * The csm variable keeps track of which charsets are allowed
1400                  * and not used yet while building the choices[].
1401                  */
1402                 csm = jpCharsetMasks[converterData->version];
1403                 choiceCount = 0;
1404
1405                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
1406                 if(converterData->version == 3 || converterData->version == 4) {
1407                     choices[choiceCount++] = cs = (int8_t)HWKANA_7BIT;
1408                     csm &= ~CSM(cs);
1409                 }
1410
1411                 /* try the current G0 charset */
1412                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
1413                 csm &= ~CSM(cs);
1414
1415                 /* try the current G2 charset */
1416                 if((cs = pFromU2022State->cs[2]) != 0) {
1417                     choices[choiceCount++] = cs;
1418                     csm &= ~CSM(cs);
1419                 }
1420
1421                 /* try all the other possible charsets */
1422                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
1423                     cs = (int8_t)jpCharsetPref[i];
1424                     if(CSM(cs) & csm) {
1425                         choices[choiceCount++] = cs;
1426                         csm &= ~CSM(cs);
1427                     }
1428                 }
1429             }
1430
1431             cs = g = 0;
1432             len = 0;
1433
1434             for(i = 0; i < choiceCount && len == 0; ++i) {
1435                 cs = choices[i];
1436                 switch(cs) {
1437                 case ASCII:
1438                     if(sourceChar <= 0x7f) {
1439                         targetValue = (uint32_t)sourceChar;
1440                         len = 1;
1441                     }
1442                     break;
1443                 case ISO8859_1:
1444                     if(0x80 <= sourceChar && sourceChar <= 0xff) {
1445                         targetValue = (uint32_t)sourceChar - 0x80;
1446                         len = 1;
1447                         g = 2;
1448                     }
1449                     break;
1450                 case HWKANA_7BIT:
1451                     if((uint32_t)(0xff9f-sourceChar)<=(0xff9f-0xff61)) {
1452                         targetValue = (uint32_t)(sourceChar - (0xff61 - 0x21));
1453                         len = 1;
1454
1455                         if(converterData->version==3) {
1456                             /* JIS7: use G1 (SO) */
1457                             pFromU2022State->cs[1] = cs; /* do not output an escape sequence */
1458                             g = 1;
1459                         } else if(converterData->version==4) {
1460                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
1461                             int8_t cs0;
1462
1463                             targetValue += 0x80;
1464
1465                             cs0 = pFromU2022State->cs[0];
1466                             if(IS_JP_DBCS(cs0)) {
1467                                 /* switch from a DBCS charset to JISX201 */
1468                                 cs = (int8_t)JISX201;
1469                             } else {
1470                                 /* stay in the current G0 charset */
1471                                 cs = cs0;
1472                             }
1473                         }
1474                     }
1475                     break;
1476                 case JISX201:
1477                     /* G0 SBCS */
1478                     MBCS_SINGLE_FROM_UCHAR32(
1479                         converterData->myConverterArray[cs],
1480                         sourceChar, &targetValue,
1481                         useFallback);
1482                     if(targetValue <= 0x7f) {
1483                         len = 1;
1484                     }
1485                     break;
1486                 case ISO8859_7:
1487                     /* G0 SBCS forced to 7-bit output */
1488                     MBCS_SINGLE_FROM_UCHAR32(
1489                         converterData->myConverterArray[cs],
1490                         sourceChar, &targetValue,
1491                         useFallback);
1492                     if(0x80 <= targetValue && targetValue <= 0xff) {
1493                         targetValue -= 0x80;
1494                         len = 1;
1495                         g = 2;
1496                     }
1497                     break;
1498                 default:
1499                     /* G0 DBCS */
1500                     MBCS_FROM_UCHAR32_ISO2022(
1501                         converterData->myConverterArray[cs],
1502                         sourceChar, &targetValue,
1503                         useFallback, &len, MBCS_OUTPUT_2);
1504                     if(len != 2) {
1505                         len = 0;
1506                     }
1507                     break;
1508                 }
1509             }
1510
1511             if(len > 0) {
1512                 outLen = 0; /* count output bytes */
1513
1514                 /* write SI if necessary (only for JIS7) */
1515                 if(pFromU2022State->g == 1 && g == 0) {
1516                     buffer[outLen++] = UCNV_SI;
1517                     pFromU2022State->g = 0;
1518                 }
1519
1520                 /* write the designation sequence if necessary */
1521                 if(cs != pFromU2022State->cs[g]) {
1522                     int32_t escLen = escSeqCharsLen[cs];
1523                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
1524                     outLen += escLen;
1525                     pFromU2022State->cs[g] = cs;
1526
1527                     /* invalidate the choices[] */
1528                     choiceCount = 0;
1529                 }
1530
1531                 /* write the shift sequence if necessary */
1532                 if(g != pFromU2022State->g) {
1533                     switch(g) {
1534                     /* case 0 handled before writing escapes */
1535                     case 1:
1536                         buffer[outLen++] = UCNV_SO;
1537                         pFromU2022State->g = 1;
1538                         break;
1539                     default: /* case 2 */
1540                         buffer[outLen++] = 0x1b;
1541                         buffer[outLen++] = 0x4e;
1542                         break;
1543                     /* no case 3: no SS3 in ISO-2022-JP-x */
1544                     }
1545                 }
1546
1547                 /* write the output bytes */
1548                 if(len == 1) {
1549                     buffer[outLen++] = (char)targetValue;
1550                 } else /* len == 2 */ {
1551                     buffer[outLen++] = (char)(targetValue >> 8);
1552                     buffer[outLen++] = (char)targetValue;
1553                 }
1554             } else {
1555                 /*
1556                  * if we cannot find the character after checking all codepages
1557                  * then this is an error
1558                  */
1559                 *err = U_INVALID_CHAR_FOUND;
1560                 args->converter->fromUChar32=sourceChar;
1561                 break;
1562             }
1563
1564             if(sourceChar == CR || sourceChar == LF) {
1565                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
1566                 pFromU2022State->cs[2] = 0;
1567                 choiceCount = 0;
1568             }
1569
1570             /* output outLen>0 bytes in buffer[] */
1571             if(outLen == 1) {
1572                 *target++ = buffer[0];
1573                 if(offsets) {
1574                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
1575                 }
1576             } else if(outLen == 2 && (target + 2) <= targetLimit) {
1577                 *target++ = buffer[0];
1578                 *target++ = buffer[1];
1579                 if(offsets) {
1580                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
1581                     *offsets++ = sourceIndex;
1582                     *offsets++ = sourceIndex;
1583                 }
1584             } else {
1585                 fromUWriteUInt8(
1586                     args->converter,
1587                     buffer, outLen,
1588                     &target, (const char *)targetLimit,
1589                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
1590                     err);
1591                 if(U_FAILURE(*err)) {
1592                     break;
1593                 }
1594             }
1595         } /* end if(myTargetIndex<myTargetLength) */
1596         else{
1597             *err =U_BUFFER_OVERFLOW_ERROR;
1598             break;
1599         }
1600
1601     }/* end while(mySourceIndex<mySourceLength) */
1602
1603     /*
1604      * the end of the input stream and detection of truncated input
1605      * are handled by the framework, but for ISO-2022-JP conversion
1606      * we need to be in ASCII mode at the very end
1607      *
1608      * conditions:
1609      *   successful
1610      *   in SO mode or not in ASCII mode
1611      *   end of input and no truncated input
1612      */
1613     if( U_SUCCESS(*err) &&
1614         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
1615         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
1616     ) {
1617         int32_t sourceIndex;
1618
1619         outLen = 0;
1620
1621         if(pFromU2022State->g != 0) {
1622             buffer[outLen++] = UCNV_SI;
1623             pFromU2022State->g = 0;
1624         }
1625
1626         if(pFromU2022State->cs[0] != ASCII) {
1627             int32_t escLen = escSeqCharsLen[ASCII];
1628             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
1629             outLen += escLen;
1630             pFromU2022State->cs[0] = (int8_t)ASCII;
1631         }
1632
1633         /* get the source index of the last input character */
1634         /*
1635          * TODO this would be simpler and more reliable if we used a pair
1636          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
1637          * so that we could simply use the prevSourceIndex here;
1638          * this code gives an incorrect result for the rare case of an unmatched
1639          * trail surrogate that is alone in the last buffer of the text stream
1640          */
1641         sourceIndex=(int32_t)(source-args->source);
1642         if(sourceIndex>0) {
1643             --sourceIndex;
1644             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
1645                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
1646             ) {
1647                 --sourceIndex;
1648             }
1649         } else {
1650             sourceIndex=-1;
1651         }
1652
1653         fromUWriteUInt8(
1654             args->converter,
1655             buffer, outLen,
1656             &target, (const char *)targetLimit,
1657             &offsets, sourceIndex,
1658             err);
1659     }
1660
1661     /*save the state and return */
1662     args->source = source;
1663     args->target = (char*)target;
1664 }
1665
1666 /*************** to unicode *******************/
1667
1668 static void
1669 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
1670                                                UErrorCode* err){
1671     char tempBuf[3];
1672     const char *mySource = (char *) args->source;
1673     UChar *myTarget = args->target;
1674     const char *mySourceLimit = args->sourceLimit;
1675     uint32_t targetUniChar = 0x0000;
1676     uint32_t mySourceChar = 0x0000;
1677     UConverterDataISO2022* myData;
1678     ISO2022State *pToU2022State;
1679     StateEnum cs;
1680
1681     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
1682     pToU2022State = &myData->toU2022State;
1683
1684     if(myData->key != 0) {
1685         /* continue with a partial escape sequence */
1686         goto escape;
1687     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
1688         /* continue with a partial double-byte character */
1689         mySourceChar = args->converter->toUBytes[0];
1690         args->converter->toULength = 0;
1691         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
1692         goto getTrailByte;
1693     }
1694
1695     while(mySource < mySourceLimit){
1696
1697         targetUniChar =missingCharMarker;
1698
1699         if(myTarget < args->targetLimit){
1700
1701             mySourceChar= (unsigned char) *mySource++;
1702
1703             switch(mySourceChar) {
1704             case UCNV_SI:
1705                 if(myData->version==3) {
1706                     pToU2022State->g=0;
1707                     continue;
1708                 } else {
1709                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
1710                     myData->isEmptySegment = FALSE;     /* reset this, we have a different error */
1711                     break;
1712                 }
1713
1714             case UCNV_SO:
1715                 if(myData->version==3) {
1716                     /* JIS7: switch to G1 half-width Katakana */
1717                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
1718                     pToU2022State->g=1;
1719                     continue;
1720                 } else {
1721                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
1722                     myData->isEmptySegment = FALSE;     /* reset this, we have a different error */
1723                     break;
1724                 }
1725
1726             case ESC_2022:
1727                 mySource--;
1728 escape:
1729                 {
1730                     const char * mySourceBefore = mySource;
1731                     int8_t toULengthBefore = args->converter->toULength;
1732
1733                     changeState_2022(args->converter,&(mySource),
1734                         mySourceLimit, ISO_2022_JP,err);
1735
1736                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
1737                     if ( myData->version == 0 && myData->key == 0 && U_SUCCESS(*err) && myData->isEmptySegment ) {
1738                         *err = U_PARSE_ERROR;   /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
1739                         args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
1740                     }
1741
1742                 }
1743                 /* invalid or illegal escape sequence */
1744                 if(U_FAILURE(*err)){
1745                     args->target = myTarget;
1746                     args->source = mySource;
1747                     myData->isEmptySegment = FALSE;     /* Reset to avoid future spurious errors */
1748                     return;
1749                 }
1750                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
1751                 if (myData->key == 0) {
1752                     myData->isEmptySegment = TRUE;
1753                 }
1754                 continue;
1755
1756             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
1757
1758             case CR:
1759                 /*falls through*/
1760             case LF:
1761                 /* automatically reset to single-byte mode */
1762                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
1763                     pToU2022State->cs[0] = (int8_t)ASCII;
1764                 }
1765                 pToU2022State->cs[2] = 0;
1766                 pToU2022State->g = 0;
1767                 /* falls through */
1768             default:
1769                 /* convert one or two bytes */
1770                 myData->isEmptySegment = FALSE;
1771                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
1772                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
1773                     !IS_JP_DBCS(cs)
1774                 ) {
1775                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
1776                     targetUniChar = mySourceChar + (0xff61 - 0xa1);
1777
1778                     /* return from a single-shift state to the previous one */
1779                     if(pToU2022State->g >= 2) {
1780                         pToU2022State->g=pToU2022State->prevG;
1781                     }
1782                 } else switch(cs) {
1783                 case ASCII:
1784                     if(mySourceChar <= 0x7f) {
1785                         targetUniChar = mySourceChar;
1786                     }
1787                     break;
1788                 case ISO8859_1:
1789                     if(mySourceChar <= 0x7f) {
1790                         targetUniChar = mySourceChar + 0x80;
1791                     }
1792                     /* return from a single-shift state to the previous one */
1793                     pToU2022State->g=pToU2022State->prevG;
1794                     break;
1795                 case ISO8859_7:
1796                     if(mySourceChar <= 0x7f) {
1797                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
1798                         targetUniChar =
1799                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
1800                                 myData->myConverterArray[cs],
1801                                 mySourceChar + 0x80);
1802                     }
1803                     /* return from a single-shift state to the previous one */
1804                     pToU2022State->g=pToU2022State->prevG;
1805                     break;
1806                 case JISX201:
1807                     if(mySourceChar <= 0x7f) {
1808                         targetUniChar =
1809                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
1810                                 myData->myConverterArray[cs],
1811                                 mySourceChar);
1812                     }
1813                     break;
1814                 case HWKANA_7BIT:
1815                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
1816                         /* 7-bit halfwidth Katakana */
1817                         targetUniChar = mySourceChar + (0xff61 - 0x21);
1818                     }
1819                     break;
1820                 default:
1821                     /* G0 DBCS */
1822                     if(mySource < mySourceLimit) {
1823                         char trailByte;
1824 getTrailByte:
1825                         tempBuf[0] = (char) (mySourceChar);
1826                         tempBuf[1] = trailByte = *mySource++;
1827                         mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
1828                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
1829                     } else {
1830                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
1831                         args->converter->toULength = 1;
1832                         goto endloop;
1833                     }
1834                 }
1835                 break;
1836             }
1837             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
1838                 if(args->offsets){
1839                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
1840                 }
1841                 *(myTarget++)=(UChar)targetUniChar;
1842             }
1843             else if(targetUniChar > missingCharMarker){
1844                 /* disassemble the surrogate pair and write to output*/
1845                 targetUniChar-=0x0010000;
1846                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
1847                 if(args->offsets){
1848                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
1849                 }
1850                 ++myTarget;
1851                 if(myTarget< args->targetLimit){
1852                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
1853                     if(args->offsets){
1854                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
1855                     }
1856                     ++myTarget;
1857                 }else{
1858                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
1859                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
1860                 }
1861
1862             }
1863             else{
1864                 /* Call the callback function*/
1865                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
1866                 break;
1867             }
1868         }
1869         else{
1870             *err =U_BUFFER_OVERFLOW_ERROR;
1871             break;
1872         }
1873     }
1874 endloop:
1875     args->target = myTarget;
1876     args->source = mySource;
1877 }
1878
1879
1880 /***************************************************************
1881 *   Rules for ISO-2022-KR encoding
1882 *   i) The KSC5601 designator sequence should appear only once in a file,
1883 *      at the begining of a line before any KSC5601 characters. This usually
1884 *      means that it appears by itself on the first line of the file
1885 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
1886 *      and SI to shift into single byte mode
1887 */
1888 static void
1889 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
1890
1891     UConverter* saveConv = args->converter;
1892     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
1893     args->converter=myConverterData->currentConverter;
1894
1895     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
1896     ucnv_MBCSFromUnicodeWithOffsets(args,err);
1897     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
1898
1899     if(*err == U_BUFFER_OVERFLOW_ERROR) {
1900         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
1901             uprv_memcpy(
1902                 saveConv->charErrorBuffer,
1903                 myConverterData->currentConverter->charErrorBuffer,
1904                 myConverterData->currentConverter->charErrorBufferLength);
1905         }
1906         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
1907         myConverterData->currentConverter->charErrorBufferLength = 0;
1908     }
1909     args->converter=saveConv;
1910 }
1911
1912 static void
1913 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
1914
1915     const UChar *source = args->source;
1916     const UChar *sourceLimit = args->sourceLimit;
1917     unsigned char *target = (unsigned char *) args->target;
1918     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
1919     int32_t* offsets = args->offsets;
1920     uint32_t targetByteUnit = 0x0000;
1921     UChar32 sourceChar = 0x0000;
1922     UBool isTargetByteDBCS;
1923     UBool oldIsTargetByteDBCS;
1924     UConverterDataISO2022 *converterData;
1925     UConverterSharedData* sharedData;
1926     UBool useFallback;
1927     int32_t length =0;
1928
1929     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
1930     /* if the version is 1 then the user is requesting
1931      * conversion with ibm-25546 pass the arguments to
1932      * MBCS converter and return
1933      */
1934     if(converterData->version==1){
1935         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
1936         return;
1937     }
1938
1939     /* initialize data */
1940     sharedData = converterData->currentConverter->sharedData;
1941     useFallback = args->converter->useFallback;
1942     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
1943     oldIsTargetByteDBCS = isTargetByteDBCS;
1944
1945     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
1946     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
1947         goto getTrail;
1948     }
1949     while(source < sourceLimit){
1950
1951         targetByteUnit = missingCharMarker;
1952
1953         if(target < (unsigned char*) args->targetLimit){
1954             sourceChar = *source++;
1955
1956             /* do not convert SO/SI/ESC */
1957             if(IS_2022_CONTROL(sourceChar)) {
1958                 /* callback(illegal) */
1959                 *err=U_ILLEGAL_CHAR_FOUND;
1960                 args->converter->fromUChar32=sourceChar;
1961                 break;
1962             }
1963
1964            /* length= ucnv_MBCSFromUChar32(converterData->currentConverter->sharedData,
1965                 sourceChar,&targetByteUnit,args->converter->useFallback);*/
1966             MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,&length,MBCS_OUTPUT_2);
1967             /* only DBCS or SBCS characters are expected*/
1968             /* DB characters with high bit set to 1 are expected */
1969             if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
1970                 targetByteUnit=missingCharMarker;
1971             }
1972             if (targetByteUnit != missingCharMarker){
1973
1974                 oldIsTargetByteDBCS = isTargetByteDBCS;
1975                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
1976                   /* append the shift sequence */
1977                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
1978
1979                     if (isTargetByteDBCS)
1980                         *target++ = UCNV_SO;
1981                     else
1982                         *target++ = UCNV_SI;
1983                     if(offsets)
1984                         *(offsets++) = (int32_t)(source - args->source-1);
1985                 }
1986                 /* write the targetUniChar  to target */
1987                 if(targetByteUnit <= 0x00FF){
1988                     if( target < targetLimit){
1989                         *(target++) = (unsigned char) targetByteUnit;
1990                         if(offsets){
1991                             *(offsets++) = (int32_t)(source - args->source-1);
1992                         }
1993
1994                     }else{
1995                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
1996                         *err = U_BUFFER_OVERFLOW_ERROR;
1997                     }
1998                 }else{
1999                     if(target < targetLimit){
2000                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
2001                         if(offsets){
2002                             *(offsets++) = (int32_t)(source - args->source-1);
2003                         }
2004                         if(target < targetLimit){
2005                             *(target++) =(unsigned char) (targetByteUnit -0x80);
2006                             if(offsets){
2007                                 *(offsets++) = (int32_t)(source - args->source-1);
2008                             }
2009                         }else{
2010                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
2011                             *err = U_BUFFER_OVERFLOW_ERROR;
2012                         }
2013                     }else{
2014                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
2015                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
2016                         *err = U_BUFFER_OVERFLOW_ERROR;
2017                     }
2018                 }
2019
2020             }
2021             else{
2022                 /* oops.. the code point is unassingned
2023                  * set the error and reason
2024                  */
2025
2026                 /*check if the char is a First surrogate*/
2027                 if(UTF_IS_SURROGATE(sourceChar)) {
2028                     if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
2029 getTrail:
2030                         /*look ahead to find the trail surrogate*/
2031                         if(source <  sourceLimit) {
2032                             /* test the following code unit */
2033                             UChar trail=(UChar) *source;
2034                             if(UTF_IS_SECOND_SURROGATE(trail)) {
2035                                 source++;
2036                                 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
2037                                 *err = U_INVALID_CHAR_FOUND;
2038                                 /* convert this surrogate code point */
2039                                 /* exit this condition tree */
2040                             } else {
2041                                 /* this is an unmatched lead code unit (1st surrogate) */
2042                                 /* callback(illegal) */
2043                                 *err=U_ILLEGAL_CHAR_FOUND;
2044                             }
2045                         } else {
2046                             /* no more input */
2047                             *err = U_ZERO_ERROR;
2048                         }
2049                     } else {
2050                         /* this is an unmatched trail code unit (2nd surrogate) */
2051                         /* callback(illegal) */
2052                         *err=U_ILLEGAL_CHAR_FOUND;
2053                     }
2054                 } else {
2055                     /* callback(unassigned) for a BMP code point */
2056                     *err = U_INVALID_CHAR_FOUND;
2057                 }
2058
2059                 args->converter->fromUChar32=sourceChar;
2060                 break;
2061             }
2062         } /* end if(myTargetIndex<myTargetLength) */
2063         else{
2064             *err =U_BUFFER_OVERFLOW_ERROR;
2065             break;
2066         }
2067
2068     }/* end while(mySourceIndex<mySourceLength) */
2069
2070     /*
2071      * the end of the input stream and detection of truncated input
2072      * are handled by the framework, but for ISO-2022-KR conversion
2073      * we need to be in ASCII mode at the very end
2074      *
2075      * conditions:
2076      *   successful
2077      *   not in ASCII mode
2078      *   end of input and no truncated input
2079      */
2080     if( U_SUCCESS(*err) &&
2081         isTargetByteDBCS &&
2082         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
2083     ) {
2084         int32_t sourceIndex;
2085
2086         /* we are switching to ASCII */
2087         isTargetByteDBCS=FALSE;
2088
2089         /* get the source index of the last input character */
2090         /*
2091          * TODO this would be simpler and more reliable if we used a pair
2092          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
2093          * so that we could simply use the prevSourceIndex here;
2094          * this code gives an incorrect result for the rare case of an unmatched
2095          * trail surrogate that is alone in the last buffer of the text stream
2096          */
2097         sourceIndex=(int32_t)(source-args->source);
2098         if(sourceIndex>0) {
2099             --sourceIndex;
2100             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
2101                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
2102             ) {
2103                 --sourceIndex;
2104             }
2105         } else {
2106             sourceIndex=-1;
2107         }
2108
2109         fromUWriteUInt8(
2110             args->converter,
2111             SHIFT_IN_STR, 1,
2112             &target, (const char *)targetLimit,
2113             &offsets, sourceIndex,
2114             err);
2115     }
2116
2117     /*save the state and return */
2118     args->source = source;
2119     args->target = (char*)target;
2120     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
2121 }
2122
2123 /************************ To Unicode ***************************************/
2124
2125 static void
2126 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
2127                                                             UErrorCode* err){
2128     char const* sourceStart;
2129     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2130
2131     UConverterToUnicodeArgs subArgs;
2132     int32_t minArgsSize;
2133
2134     /* set up the subconverter arguments */
2135     if(args->size<sizeof(UConverterToUnicodeArgs)) {
2136         minArgsSize = args->size;
2137     } else {
2138         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
2139     }
2140
2141     uprv_memcpy(&subArgs, args, minArgsSize);
2142     subArgs.size = (uint16_t)minArgsSize;
2143     subArgs.converter = myData->currentConverter;
2144
2145     /* remember the original start of the input for offsets */
2146     sourceStart = args->source;
2147
2148     if(myData->key != 0) {
2149         /* continue with a partial escape sequence */
2150         goto escape;
2151     }
2152
2153     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
2154         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
2155         subArgs.source = args->source;
2156         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
2157         if(subArgs.source != subArgs.sourceLimit) {
2158             /*
2159              * get the current partial byte sequence
2160              *
2161              * it needs to be moved between the public and the subconverter
2162              * so that the conversion framework, which only sees the public
2163              * converter, can handle truncated and illegal input etc.
2164              */
2165             if(args->converter->toULength > 0) {
2166                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
2167             }
2168             subArgs.converter->toULength = args->converter->toULength;
2169
2170             /*
2171              * Convert up to the end of the input, or to before the next escape character.
2172              * Does not handle conversion extensions because the preToU[] state etc.
2173              * is not copied.
2174              */
2175             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
2176
2177             if(args->offsets != NULL && sourceStart != args->source) {
2178                 /* update offsets to base them on the actual start of the input */
2179                 int32_t *offsets = args->offsets;
2180                 UChar *target = args->target;
2181                 int32_t delta = (int32_t)(args->source - sourceStart);
2182                 while(target < subArgs.target) {
2183                     if(*offsets >= 0) {
2184                         *offsets += delta;
2185                     }
2186                     ++offsets;
2187                     ++target;
2188                 }
2189             }
2190             args->source = subArgs.source;
2191             args->target = subArgs.target;
2192             args->offsets = subArgs.offsets;
2193
2194             /* copy input/error/overflow buffers */
2195             if(subArgs.converter->toULength > 0) {
2196                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
2197             }
2198             args->converter->toULength = subArgs.converter->toULength;
2199
2200             if(*err == U_BUFFER_OVERFLOW_ERROR) {
2201                 if(subArgs.converter->UCharErrorBufferLength > 0) {
2202                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
2203                                 subArgs.converter->UCharErrorBufferLength);
2204                 }
2205                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
2206                 subArgs.converter->UCharErrorBufferLength = 0;
2207             }
2208         }
2209
2210         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
2211             return;
2212         }
2213
2214 escape:
2215         changeState_2022(args->converter,
2216                &(args->source),
2217                args->sourceLimit,
2218                ISO_2022_KR,
2219                err);
2220     }
2221 }
2222
2223 static void
2224 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2225                                                             UErrorCode* err){
2226     char tempBuf[2];
2227     const char *mySource = ( char *) args->source;
2228     UChar *myTarget = args->target;
2229     const char *mySourceLimit = args->sourceLimit;
2230     UChar32 targetUniChar = 0x0000;
2231     UChar mySourceChar = 0x0000;
2232     UConverterDataISO2022* myData;
2233     UConverterSharedData* sharedData ;
2234     UBool useFallback;
2235
2236     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2237     if(myData->version==1){
2238         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
2239         return;
2240     }
2241
2242     /* initialize state */
2243     sharedData = myData->currentConverter->sharedData;
2244     useFallback = args->converter->useFallback;
2245
2246     if(myData->key != 0) {
2247         /* continue with a partial escape sequence */
2248         goto escape;
2249     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
2250         /* continue with a partial double-byte character */
2251         mySourceChar = args->converter->toUBytes[0];
2252         args->converter->toULength = 0;
2253         goto getTrailByte;
2254     }
2255
2256     while(mySource< mySourceLimit){
2257
2258         if(myTarget < args->targetLimit){
2259
2260             mySourceChar= (unsigned char) *mySource++;
2261
2262             if(mySourceChar==UCNV_SI){
2263                 myData->toU2022State.g = 0;
2264                 if (myData->isEmptySegment) {
2265                     myData->isEmptySegment = FALSE;     /* we are handling it, reset to avoid future spurious errors */
2266                     *err = U_PARSE_ERROR;       /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
2267                     args->converter->toUBytes[0] = mySourceChar;
2268                     args->converter->toULength = 1;
2269                     args->target = myTarget;
2270                     args->source = mySource;
2271                     return;
2272                 }
2273                 /*consume the source */
2274                 continue;
2275             }else if(mySourceChar==UCNV_SO){
2276                 myData->toU2022State.g = 1;
2277                 myData->isEmptySegment = TRUE;  /* Begin a new segment, empty so far */
2278                 /*consume the source */
2279                 continue;
2280             }else if(mySourceChar==ESC_2022){
2281                 mySource--;
2282 escape:
2283                 myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */
2284                 changeState_2022(args->converter,&(mySource),
2285                                 mySourceLimit, ISO_2022_KR, err);
2286                 if(U_FAILURE(*err)){
2287                     args->target = myTarget;
2288                     args->source = mySource;
2289                     return;
2290                 }
2291                 continue;
2292             }
2293
2294             myData->isEmptySegment = FALSE;     /* Any invalid char errors will be detected separately, so just reset this */
2295             if(myData->toU2022State.g == 1) {
2296                 if(mySource < mySourceLimit) {
2297                     char trailByte;
2298 getTrailByte:
2299                     trailByte = *mySource++;
2300                     tempBuf[0] = (char)(mySourceChar + 0x80);
2301                     tempBuf[1] = (char)(trailByte + 0x80);
2302                     mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
2303                     if((mySourceChar & 0x8080) == 0) {
2304                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
2305                     } else {
2306                         /* illegal bytes > 0x7f */
2307                         targetUniChar = missingCharMarker;
2308                     }
2309                 } else {
2310                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2311                     args->converter->toULength = 1;
2312                     break;
2313                 }
2314             }
2315             else{
2316                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
2317             }
2318             if(targetUniChar < 0xfffe){
2319                 if(args->offsets) {
2320                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2321                 }
2322                 *(myTarget++)=(UChar)targetUniChar;
2323             }
2324             else {
2325                 /* Call the callback function*/
2326                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2327                 break;
2328             }
2329         }
2330         else{
2331             *err =U_BUFFER_OVERFLOW_ERROR;
2332             break;
2333         }
2334     }
2335     args->target = myTarget;
2336     args->source = mySource;
2337 }
2338
2339 /*************************** END ISO2022-KR *********************************/
2340
2341 /*************************** ISO-2022-CN *********************************
2342 *
2343 * Rules for ISO-2022-CN Encoding:
2344 * i)   The designator sequence must appear once on a line before any instance
2345 *      of character set it designates.
2346 * ii)  If two lines contain characters from the same character set, both lines
2347 *      must include the designator sequence.
2348 * iii) Once the designator sequence is known, a shifting sequence has to be found
2349 *      to invoke the  shifting
2350 * iv)  All lines start in ASCII and end in ASCII.
2351 * v)   Four shifting sequences are employed for this purpose:
2352 *
2353 *      Sequcence   ASCII Eq    Charsets
2354 *      ----------  -------    ---------
2355 *      SI           <SI>        US-ASCII
2356 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
2357 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
2358 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
2359 *
2360 * vi)
2361 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
2362 *      SS2designator : ESC "$" "*" finalchar_for_SS2
2363 *      SS3designator : ESC "$" "+" finalchar_for_SS3
2364 *
2365 *      ESC $ ) A       Indicates the bytes following SO are Chinese
2366 *       characters as defined in GB 2312-80, until
2367 *       another SOdesignation appears
2368 *
2369 *
2370 *      ESC $ ) E       Indicates the bytes following SO are as defined
2371 *       in ISO-IR-165 (for details, see section 2.1),
2372 *       until another SOdesignation appears
2373 *
2374 *      ESC $ ) G       Indicates the bytes following SO are as defined
2375 *       in CNS 11643-plane-1, until another
2376 *       SOdesignation appears
2377 *
2378 *      ESC $ * H       Indicates the two bytes immediately following
2379 *       SS2 is a Chinese character as defined in CNS
2380 *       11643-plane-2, until another SS2designation
2381 *       appears
2382 *       (Meaning <ESC>N must preceed every 2 byte
2383 *        sequence.)
2384 *
2385 *      ESC $ + I       Indicates the immediate two bytes following SS3
2386 *       is a Chinese character as defined in CNS
2387 *       11643-plane-3, until another SS3designation
2388 *       appears
2389 *       (Meaning <ESC>O must preceed every 2 byte
2390 *        sequence.)
2391 *
2392 *      ESC $ + J       Indicates the immediate two bytes following SS3
2393 *       is a Chinese character as defined in CNS
2394 *       11643-plane-4, until another SS3designation
2395 *       appears
2396 *       (In English: <ESC>O must preceed every 2 byte
2397 *        sequence.)
2398 *
2399 *      ESC $ + K       Indicates the immediate two bytes following SS3
2400 *       is a Chinese character as defined in CNS
2401 *       11643-plane-5, until another SS3designation
2402 *       appears
2403 *
2404 *      ESC $ + L       Indicates the immediate two bytes following SS3
2405 *       is a Chinese character as defined in CNS
2406 *       11643-plane-6, until another SS3designation
2407 *       appears
2408 *
2409 *      ESC $ + M       Indicates the immediate two bytes following SS3
2410 *       is a Chinese character as defined in CNS
2411 *       11643-plane-7, until another SS3designation
2412 *       appears
2413 *
2414 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
2415 *       has its own designation information before any Chinese characters
2416 *       appear
2417 *
2418 */
2419
2420 /* The following are defined this way to make the strings truely readonly */
2421 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
2422 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
2423 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
2424 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
2425 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
2426 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
2427 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
2428 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
2429 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
2430
2431 /********************** ISO2022-CN Data **************************/
2432 static const char* const escSeqCharsCN[10] ={
2433         SHIFT_IN_STR,           /* ASCII */
2434         GB_2312_80_STR,
2435         ISO_IR_165_STR,
2436         CNS_11643_1992_Plane_1_STR,
2437         CNS_11643_1992_Plane_2_STR,
2438         CNS_11643_1992_Plane_3_STR,
2439         CNS_11643_1992_Plane_4_STR,
2440         CNS_11643_1992_Plane_5_STR,
2441         CNS_11643_1992_Plane_6_STR,
2442         CNS_11643_1992_Plane_7_STR
2443 };
2444
2445 static void
2446 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
2447
2448     UConverterDataISO2022 *converterData;
2449     ISO2022State *pFromU2022State;
2450     uint8_t *target = (uint8_t *) args->target;
2451     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
2452     const UChar* source = args->source;
2453     const UChar* sourceLimit = args->sourceLimit;
2454     int32_t* offsets = args->offsets;
2455     UChar32 sourceChar;
2456     char buffer[8];
2457     int32_t len;
2458     int8_t choices[3];
2459     int32_t choiceCount;
2460     uint32_t targetValue = 0;
2461     UBool useFallback;
2462
2463     /* set up the state */
2464     converterData     = (UConverterDataISO2022*)args->converter->extraInfo;
2465     pFromU2022State   = &converterData->fromU2022State;
2466     useFallback       = args->converter->useFallback;
2467
2468     choiceCount = 0;
2469
2470     /* check if the last codepoint of previous buffer was a lead surrogate*/
2471     if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
2472         goto getTrail;
2473     }
2474
2475     while( source < sourceLimit){
2476         if(target < targetLimit){
2477
2478             sourceChar  = *(source++);
2479             /*check if the char is a First surrogate*/
2480              if(UTF_IS_SURROGATE(sourceChar)) {
2481                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
2482 getTrail:
2483                     /*look ahead to find the trail surrogate*/
2484                     if(source < sourceLimit) {
2485                         /* test the following code unit */
2486                         UChar trail=(UChar) *source;
2487                         if(UTF_IS_SECOND_SURROGATE(trail)) {
2488                             source++;
2489                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
2490                             args->converter->fromUChar32=0x00;
2491                             /* convert this supplementary code point */
2492                             /* exit this condition tree */
2493                         } else {
2494                             /* this is an unmatched lead code unit (1st surrogate) */
2495                             /* callback(illegal) */
2496                             *err=U_ILLEGAL_CHAR_FOUND;
2497                             args->converter->fromUChar32=sourceChar;
2498                             break;
2499                         }
2500                     } else {
2501                         /* no more input */
2502                         args->converter->fromUChar32=sourceChar;
2503                         break;
2504                     }
2505                 } else {
2506                     /* this is an unmatched trail code unit (2nd surrogate) */
2507                     /* callback(illegal) */
2508                     *err=U_ILLEGAL_CHAR_FOUND;
2509                     args->converter->fromUChar32=sourceChar;
2510                     break;
2511                 }
2512             }
2513
2514             /* do the conversion */
2515             if(sourceChar <= 0x007f ){
2516                 /* do not convert SO/SI/ESC */
2517                 if(IS_2022_CONTROL(sourceChar)) {
2518                     /* callback(illegal) */
2519                     *err=U_ILLEGAL_CHAR_FOUND;
2520                     args->converter->fromUChar32=sourceChar;
2521                     break;
2522                 }
2523
2524                 /* US-ASCII */
2525                 if(pFromU2022State->g == 0) {
2526                     buffer[0] = (char)sourceChar;
2527                     len = 1;
2528                 } else {
2529                     buffer[0] = UCNV_SI;
2530                     buffer[1] = (char)sourceChar;
2531                     len = 2;
2532                     pFromU2022State->g = 0;
2533                     choiceCount = 0;
2534                 }
2535                 if(sourceChar == CR || sourceChar == LF) {
2536                     /* reset the state at the end of a line */
2537                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
2538                     choiceCount = 0;
2539                 }
2540             }
2541             else{
2542                 /* convert U+0080..U+10ffff */
2543                 UConverterSharedData *cnv;
2544                 int32_t i;
2545                 int8_t cs, g;
2546
2547                 if(choiceCount == 0) {
2548                     /* try the current SO/G1 converter first */
2549                     choices[0] = pFromU2022State->cs[1];
2550
2551                     /* default to GB2312_1 if none is designated yet */
2552                     if(choices[0] == 0) {
2553                         choices[0] = GB2312_1;
2554                     }
2555
2556                     if(converterData->version == 0) {
2557                         /* ISO-2022-CN */
2558
2559                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
2560                         if(choices[0] == GB2312_1) {
2561                             choices[1] = (int8_t)CNS_11643_1;
2562                         } else {
2563                             choices[1] = (int8_t)GB2312_1;
2564                         }
2565
2566                         choiceCount = 2;
2567                     } else {
2568                         /* ISO-2022-CN-EXT */
2569
2570                         /* try one of the other converters */
2571                         switch(choices[0]) {
2572                         case GB2312_1:
2573                             choices[1] = (int8_t)CNS_11643_1;
2574                             choices[2] = (int8_t)ISO_IR_165;
2575                             break;
2576                         case ISO_IR_165:
2577                             choices[1] = (int8_t)GB2312_1;
2578                             choices[2] = (int8_t)CNS_11643_1;
2579                             break;
2580                         default: /* CNS_11643_x */
2581                             choices[1] = (int8_t)GB2312_1;
2582                             choices[2] = (int8_t)ISO_IR_165;
2583                             break;
2584                         }
2585
2586                         choiceCount = 3;
2587                     }
2588                 }
2589
2590                 cs = g = 0;
2591                 len = 0;
2592
2593                 for(i = 0; i < choiceCount && len == 0; ++i) {
2594                     cs = choices[i];
2595                     if(cs > 0) {
2596                         if(cs > CNS_11643_0) {
2597                             cnv = converterData->myConverterArray[CNS_11643];
2598                             MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_3);
2599                             if(len==3) {
2600                                 cs = (int8_t)(CNS_11643_0 + (targetValue >> 16) - 0x80);
2601                                 len = 2;
2602                                 if(cs == CNS_11643_1) {
2603                                     g = 1;
2604                                 } else if(cs == CNS_11643_2) {
2605                                     g = 2;
2606                                 } else /* plane 3..7 */ if(converterData->version == 1) {
2607                                     g = 3;
2608                                 } else {
2609                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
2610                                     len = 0;
2611                                 }
2612                             }
2613                         } else {
2614                             /* GB2312_1 or ISO-IR-165 */
2615                             cnv = converterData->myConverterArray[cs];
2616                             MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_2);
2617                             g = 1; /* used if len == 2 */
2618                         }
2619                     }
2620                 }
2621
2622                 if(len > 0) {
2623                     len = 0; /* count output bytes; it must have been len == 2 */
2624
2625                     /* write the designation sequence if necessary */
2626                     if(cs != pFromU2022State->cs[g]) {
2627                         if(cs < CNS_11643) {
2628                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
2629                         } else {
2630                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
2631                         }
2632                         len = 4;
2633                         pFromU2022State->cs[g] = cs;
2634                         if(g == 1) {
2635                             /* changing the SO/G1 charset invalidates the choices[] */
2636                             choiceCount = 0;
2637                         }
2638                     }
2639
2640                     /* write the shift sequence if necessary */
2641                     if(g != pFromU2022State->g) {
2642                         switch(g) {
2643                         case 1:
2644                             buffer[len++] = UCNV_SO;
2645
2646                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
2647                             pFromU2022State->g = 1;
2648                             break;
2649                         case 2:
2650                             buffer[len++] = 0x1b;
2651                             buffer[len++] = 0x4e;
2652                             break;
2653                         default: /* case 3 */
2654                             buffer[len++] = 0x1b;
2655                             buffer[len++] = 0x4f;
2656                             break;
2657                         }
2658                     }
2659
2660                     /* write the two output bytes */
2661                     buffer[len++] = (char)(targetValue >> 8);
2662                     buffer[len++] = (char)targetValue;
2663                 } else {
2664                     /* if we cannot find the character after checking all codepages
2665                      * then this is an error
2666                      */
2667                     *err = U_INVALID_CHAR_FOUND;
2668                     args->converter->fromUChar32=sourceChar;
2669                     break;
2670                 }
2671             }
2672
2673             /* output len>0 bytes in buffer[] */
2674             if(len == 1) {
2675                 *target++ = buffer[0];
2676                 if(offsets) {
2677                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
2678                 }
2679             } else if(len == 2 && (target + 2) <= targetLimit) {
2680                 *target++ = buffer[0];
2681                 *target++ = buffer[1];
2682                 if(offsets) {
2683                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
2684                     *offsets++ = sourceIndex;
2685                     *offsets++ = sourceIndex;
2686                 }
2687             } else {
2688                 fromUWriteUInt8(
2689                     args->converter,
2690                     buffer, len,
2691                     &target, (const char *)targetLimit,
2692                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
2693                     err);
2694                 if(U_FAILURE(*err)) {
2695                     break;
2696                 }
2697             }
2698         } /* end if(myTargetIndex<myTargetLength) */
2699         else{
2700             *err =U_BUFFER_OVERFLOW_ERROR;
2701             break;
2702         }
2703
2704     }/* end while(mySourceIndex<mySourceLength) */
2705
2706     /*
2707      * the end of the input stream and detection of truncated input
2708      * are handled by the framework, but for ISO-2022-CN conversion
2709      * we need to be in ASCII mode at the very end
2710      *
2711      * conditions:
2712      *   successful
2713      *   not in ASCII mode
2714      *   end of input and no truncated input
2715      */
2716     if( U_SUCCESS(*err) &&
2717         pFromU2022State->g!=0 &&
2718         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
2719     ) {
2720         int32_t sourceIndex;
2721
2722         /* we are switching to ASCII */
2723         pFromU2022State->g=0;
2724
2725         /* get the source index of the last input character */
2726         /*
2727          * TODO this would be simpler and more reliable if we used a pair
2728          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
2729          * so that we could simply use the prevSourceIndex here;
2730          * this code gives an incorrect result for the rare case of an unmatched
2731          * trail surrogate that is alone in the last buffer of the text stream
2732          */
2733         sourceIndex=(int32_t)(source-args->source);
2734         if(sourceIndex>0) {
2735             --sourceIndex;
2736             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
2737                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
2738             ) {
2739                 --sourceIndex;
2740             }
2741         } else {
2742             sourceIndex=-1;
2743         }
2744
2745         fromUWriteUInt8(
2746             args->converter,
2747             SHIFT_IN_STR, 1,
2748             &target, (const char *)targetLimit,
2749             &offsets, sourceIndex,
2750             err);
2751     }
2752
2753     /*save the state and return */
2754     args->source = source;
2755     args->target = (char*)target;
2756 }
2757
2758
2759 static void
2760 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2761                                                UErrorCode* err){
2762     char tempBuf[3];
2763     const char *mySource = (char *) args->source;
2764     UChar *myTarget = args->target;
2765     const char *mySourceLimit = args->sourceLimit;
2766     uint32_t targetUniChar = 0x0000;
2767     uint32_t mySourceChar = 0x0000;
2768     UConverterDataISO2022* myData;
2769     ISO2022State *pToU2022State;
2770
2771     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2772     pToU2022State = &myData->toU2022State;
2773
2774     if(myData->key != 0) {
2775         /* continue with a partial escape sequence */
2776         goto escape;
2777     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
2778         /* continue with a partial double-byte character */
2779         mySourceChar = args->converter->toUBytes[0];
2780         args->converter->toULength = 0;
2781         goto getTrailByte;
2782     }
2783
2784     while(mySource < mySourceLimit){
2785
2786         targetUniChar =missingCharMarker;
2787
2788         if(myTarget < args->targetLimit){
2789
2790             mySourceChar= (unsigned char) *mySource++;
2791
2792             switch(mySourceChar){
2793             case UCNV_SI:
2794                 pToU2022State->g=0;
2795                 if (myData->isEmptySegment) {
2796                     myData->isEmptySegment = FALSE;     /* we are handling it, reset to avoid future spurious errors */
2797                     *err = U_PARSE_ERROR;       /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
2798                     args->converter->toUBytes[0] = mySourceChar;
2799                     args->converter->toULength = 1;
2800                     args->target = myTarget;
2801                     args->source = mySource;
2802                     return;
2803                 }
2804                 continue;
2805
2806             case UCNV_SO:
2807                 if(pToU2022State->cs[1] != 0) {
2808                     pToU2022State->g=1;
2809                     myData->isEmptySegment = TRUE;      /* Begin a new segment, empty so far */
2810                     continue;
2811                 } else {
2812                     /* illegal to have SO before a matching designator */
2813                     myData->isEmptySegment = FALSE;     /* Handling a different error, reset this to avoid future spurious errs */
2814                     break;
2815                 }
2816
2817             case ESC_2022:
2818                 mySource--;
2819 escape:
2820                 {
2821                     const char * mySourceBefore = mySource;
2822                     int8_t toULengthBefore = args->converter->toULength;
2823
2824                     changeState_2022(args->converter,&(mySource),
2825                         mySourceLimit, ISO_2022_CN,err);
2826
2827                     /* After SO there must be at least one character before a designator (designator error handled separately) */
2828                     if ( myData->key == 0 && U_SUCCESS(*err) && myData->isEmptySegment ) {
2829                         *err = U_PARSE_ERROR;   /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
2830                         args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
2831                     }
2832                 }
2833
2834                 /* invalid or illegal escape sequence */
2835                 if(U_FAILURE(*err)){
2836                     args->target = myTarget;
2837                     args->source = mySource;
2838                     myData->isEmptySegment = FALSE;     /* Reset to avoid future spurious errors */
2839                     return;
2840                 }
2841                 continue;
2842
2843             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
2844
2845             case CR:
2846                 /*falls through*/
2847             case LF:
2848                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
2849                 /* falls through */
2850             default:
2851                 /* convert one or two bytes */
2852                 myData->isEmptySegment = FALSE;
2853                 if(pToU2022State->g != 0) {
2854                     if(mySource < mySourceLimit) {
2855                         UConverterSharedData *cnv;
2856                         StateEnum tempState;
2857                         int32_t tempBufLen;
2858                         char trailByte;
2859 getTrailByte:
2860                         trailByte = *mySource++;
2861                         tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
2862                         if(tempState > CNS_11643_0) {
2863                             cnv = myData->myConverterArray[CNS_11643];
2864                             tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
2865                             tempBuf[1] = (char) (mySourceChar);
2866                             tempBuf[2] = trailByte;
2867                             tempBufLen = 3;
2868
2869                         }else{
2870                             cnv = myData->myConverterArray[tempState];
2871                             tempBuf[0] = (char) (mySourceChar);
2872                             tempBuf[1] = trailByte;
2873                             tempBufLen = 2;
2874                         }
2875                         mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
2876                         if(pToU2022State->g>=2) {
2877                             /* return from a single-shift state to the previous one */
2878                             pToU2022State->g=pToU2022State->prevG;
2879                         }
2880                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
2881                     } else {
2882                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2883                         args->converter->toULength = 1;
2884                         goto endloop;
2885                     }
2886                 }
2887                 else{
2888                     if(mySourceChar <= 0x7f) {
2889                         targetUniChar = (UChar) mySourceChar;
2890                     }
2891                 }
2892                 break;
2893             }
2894             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
2895                 if(args->offsets){
2896                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2897                 }
2898                 *(myTarget++)=(UChar)targetUniChar;
2899             }
2900             else if(targetUniChar > missingCharMarker){
2901                 /* disassemble the surrogate pair and write to output*/
2902                 targetUniChar-=0x0010000;
2903                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
2904                 if(args->offsets){
2905                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2906                 }
2907                 ++myTarget;
2908                 if(myTarget< args->targetLimit){
2909                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
2910                     if(args->offsets){
2911                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2912                     }
2913                     ++myTarget;
2914                 }else{
2915                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
2916                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
2917                 }
2918
2919             }
2920             else{
2921                 /* Call the callback function*/
2922                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2923                 break;
2924             }
2925         }
2926         else{
2927             *err =U_BUFFER_OVERFLOW_ERROR;
2928             break;
2929         }
2930     }
2931 endloop:
2932     args->target = myTarget;
2933     args->source = mySource;
2934 }
2935
2936 static void
2937 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
2938     UConverter *cnv = args->converter;
2939     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
2940     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
2941     char *p, *subchar;
2942     char buffer[8];
2943     int32_t length;
2944
2945     subchar=(char *)cnv->subChars;
2946     length=cnv->subCharLen; /* assume length==1 for most variants */
2947
2948     p = buffer;
2949     switch(myConverterData->locale[0]){
2950     case 'j':
2951         {
2952             int8_t cs;
2953
2954             if(pFromU2022State->g == 1) {
2955                 /* JIS7: switch from G1 to G0 */
2956                 pFromU2022State->g = 0;
2957                 *p++ = UCNV_SI;
2958             }
2959
2960             cs = pFromU2022State->cs[0];
2961             if(cs != ASCII && cs != JISX201) {
2962                 /* not in ASCII or JIS X 0201: switch to ASCII */
2963                 pFromU2022State->cs[0] = (int8_t)ASCII;
2964                 *p++ = '\x1b';
2965                 *p++ = '\x28';
2966                 *p++ = '\x42';
2967             }
2968
2969             *p++ = subchar[0];
2970             break;
2971         }
2972     case 'c':
2973         if(pFromU2022State->g != 0) {
2974             /* not in ASCII mode: switch to ASCII */
2975             pFromU2022State->g = 0;
2976             *p++ = UCNV_SI;
2977         }
2978         *p++ = subchar[0];
2979         break;
2980     case 'k':
2981         if(myConverterData->version == 0) {
2982             if(length == 1) {
2983                 if((UBool)args->converter->fromUnicodeStatus) {
2984                     /* in DBCS mode: switch to SBCS */
2985                     args->converter->fromUnicodeStatus = 0;
2986                     *p++ = UCNV_SI;
2987                 }
2988                 *p++ = subchar[0];
2989             } else /* length == 2*/ {
2990                 if(!(UBool)args->converter->fromUnicodeStatus) {
2991                     /* in SBCS mode: switch to DBCS */
2992                     args->converter->fromUnicodeStatus = 1;
2993                     *p++ = UCNV_SO;
2994                 }
2995                 *p++ = subchar[0];
2996                 *p++ = subchar[1];
2997             }
2998             break;
2999         } else {
3000             /* save the subconverter's substitution string */
3001             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
3002             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
3003
3004             /* set our substitution string into the subconverter */
3005             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
3006             myConverterData->currentConverter->subCharLen = (int8_t)length;
3007
3008             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
3009             args->converter = myConverterData->currentConverter;
3010             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
3011             ucnv_cbFromUWriteSub(args, 0, err);
3012             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
3013             args->converter = cnv;
3014
3015             /* restore the subconverter's substitution string */
3016             myConverterData->currentConverter->subChars = currentSubChars;
3017             myConverterData->currentConverter->subCharLen = currentSubCharLen;
3018
3019             if(*err == U_BUFFER_OVERFLOW_ERROR) {
3020                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
3021                     uprv_memcpy(
3022                         cnv->charErrorBuffer,
3023                         myConverterData->currentConverter->charErrorBuffer,
3024                         myConverterData->currentConverter->charErrorBufferLength);
3025                 }
3026                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
3027                 myConverterData->currentConverter->charErrorBufferLength = 0;
3028             }
3029             return;
3030         }
3031     default:
3032         /* not expected */
3033         break;
3034     }
3035     ucnv_cbFromUWriteBytes(args,
3036                            buffer, (int32_t)(p - buffer),
3037                            offsetIndex, err);
3038 }
3039
3040 /*
3041  * Structure for cloning an ISO 2022 converter into a single memory block.
3042  * ucnv_safeClone() of the converter will align the entire cloneStruct,
3043  * and then ucnv_safeClone() of the sub-converter may additionally align
3044  * currentConverter inside the cloneStruct, for which we need the deadSpace
3045  * after currentConverter.
3046  * This is because UAlignedMemory may be larger than the actually
3047  * necessary alignment size for the platform.
3048  * The other cloneStruct fields will not be moved around,
3049  * and are aligned properly with cloneStruct's alignment.
3050  */
3051 struct cloneStruct
3052 {
3053     UConverter cnv;
3054     UConverter currentConverter;
3055     UAlignedMemory deadSpace;
3056     UConverterDataISO2022 mydata;
3057 };
3058
3059
3060 static UConverter *
3061 _ISO_2022_SafeClone(
3062             const UConverter *cnv,
3063             void *stackBuffer,
3064             int32_t *pBufferSize,
3065             UErrorCode *status)
3066 {
3067     struct cloneStruct * localClone;
3068     UConverterDataISO2022 *cnvData;
3069     int32_t i, size;
3070
3071     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
3072         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
3073         return NULL;
3074     }
3075
3076     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
3077     localClone = (struct cloneStruct *)stackBuffer;
3078
3079     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
3080
3081     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
3082     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
3083     localClone->cnv.isExtraLocal = TRUE;
3084
3085     /* share the subconverters */
3086
3087     if(cnvData->currentConverter != NULL) {
3088         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
3089         localClone->mydata.currentConverter =
3090             ucnv_safeClone(cnvData->currentConverter,
3091                             &localClone->currentConverter,
3092                             &size, status);
3093         if(U_FAILURE(*status)) {
3094             return NULL;
3095         }
3096     }
3097
3098     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
3099         if(cnvData->myConverterArray[i] != NULL) {
3100             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
3101         }
3102     }
3103
3104     return &localClone->cnv;
3105 }
3106
3107 static void
3108 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
3109                     const USetAdder *sa,
3110                     UConverterUnicodeSet which,
3111                     UErrorCode *pErrorCode)
3112 {
3113     int32_t i;
3114     UConverterDataISO2022* cnvData;
3115
3116     if (U_FAILURE(*pErrorCode)) {
3117         return;
3118     }
3119 #ifdef U_ENABLE_GENERIC_ISO_2022
3120     if (cnv->sharedData == &_ISO2022Data) {
3121         /* We use UTF-8 in this case */
3122         sa->addRange(sa->set, 0, 0xd7FF);
3123         sa->addRange(sa->set, 0xE000, 0x10FFFF);
3124         return;
3125     }
3126 #endif
3127
3128     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3129
3130     /* open a set and initialize it with code points that are algorithmically round-tripped */
3131     switch(cnvData->locale[0]){
3132     case 'j':
3133         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3134             /* include Latin-1 for some variants of JP */
3135             sa->addRange(sa->set, 0, 0xff);
3136         } else {
3137             /* include ASCII for JP */
3138             sa->addRange(sa->set, 0, 0x7f);
3139         }
3140         if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
3141             /* include half-width Katakana for JP */
3142             sa->addRange(sa->set, 0xff61, 0xff9f);
3143         }
3144         break;
3145     case 'c':
3146     case 'z':
3147         /* include ASCII for CN */
3148         sa->addRange(sa->set, 0, 0x7f);
3149         break;
3150     case 'k':
3151         /* there is only one converter for KR, and it is not in the myConverterArray[] */
3152         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3153                 cnvData->currentConverter, sa, which, pErrorCode);
3154         /* the loop over myConverterArray[] will simply not find another converter */
3155         break;
3156     default:
3157         break;
3158     }
3159
3160     /*
3161      * Version-specific for CN:
3162      * CN version 0 does not map CNS planes 3..7 although
3163      * they are all available in the CNS conversion table;
3164      * CN version 1 does map them all.
3165      * The two versions create different Unicode sets.
3166      */
3167     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
3168         if(cnvData->myConverterArray[i]!=NULL) {
3169             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
3170                 cnvData->version==0 && i==CNS_11643
3171             ) {
3172                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
3173                 ucnv_MBCSGetUnicodeSetForBytes(
3174                         cnvData->myConverterArray[i],
3175                         sa, UCNV_ROUNDTRIP_SET,
3176                         0, 0x81, 0x82,
3177                         pErrorCode);
3178             } else {
3179                 ucnv_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);
3180             }
3181         }
3182     }
3183
3184     /*
3185      * ISO 2022 converters must not convert SO/SI/ESC despite what
3186      * sub-converters do by themselves.
3187      * Remove these characters from the set.
3188      */
3189     sa->remove(sa->set, 0x0e);
3190     sa->remove(sa->set, 0x0f);
3191     sa->remove(sa->set, 0x1b);
3192 }
3193
3194 static const UConverterImpl _ISO2022Impl={
3195     UCNV_ISO_2022,
3196
3197     NULL,
3198     NULL,
3199
3200     _ISO2022Open,
3201     _ISO2022Close,
3202     _ISO2022Reset,
3203
3204 #ifdef U_ENABLE_GENERIC_ISO_2022
3205     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3206     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3207     ucnv_fromUnicode_UTF8,
3208     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
3209 #else
3210     NULL,
3211     NULL,
3212     NULL,
3213     NULL,
3214 #endif
3215     NULL,
3216
3217     NULL,
3218     _ISO2022getName,
3219     _ISO_2022_WriteSub,
3220     _ISO_2022_SafeClone,
3221     _ISO_2022_GetUnicodeSet
3222 };
3223 static const UConverterStaticData _ISO2022StaticData={
3224     sizeof(UConverterStaticData),
3225     "ISO_2022",
3226     2022,
3227     UCNV_IBM,
3228     UCNV_ISO_2022,
3229     1,
3230     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
3231     { 0x1a, 0, 0, 0 },
3232     1,
3233     FALSE,
3234     FALSE,
3235     0,
3236     0,
3237     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3238 };
3239 const UConverterSharedData _ISO2022Data={
3240     sizeof(UConverterSharedData),
3241     ~((uint32_t) 0),
3242     NULL,
3243     NULL,
3244     &_ISO2022StaticData,
3245     FALSE,
3246     &_ISO2022Impl,
3247     0
3248 };
3249
3250 /*************JP****************/
3251 static const UConverterImpl _ISO2022JPImpl={
3252     UCNV_ISO_2022,
3253
3254     NULL,
3255     NULL,
3256
3257     _ISO2022Open,
3258     _ISO2022Close,
3259     _ISO2022Reset,
3260
3261     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3262     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3263     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3264     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3265     NULL,
3266
3267     NULL,
3268     _ISO2022getName,
3269     _ISO_2022_WriteSub,
3270     _ISO_2022_SafeClone,
3271     _ISO_2022_GetUnicodeSet
3272 };
3273 static const UConverterStaticData _ISO2022JPStaticData={
3274     sizeof(UConverterStaticData),
3275     "ISO_2022_JP",
3276     0,
3277     UCNV_IBM,
3278     UCNV_ISO_2022,
3279     1,
3280     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
3281     { 0x1a, 0, 0, 0 },
3282     1,
3283     FALSE,
3284     FALSE,
3285     0,
3286     0,
3287     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3288 };
3289 static const UConverterSharedData _ISO2022JPData={
3290     sizeof(UConverterSharedData),
3291     ~((uint32_t) 0),
3292     NULL,
3293     NULL,
3294     &_ISO2022JPStaticData,
3295     FALSE,
3296     &_ISO2022JPImpl,
3297     0
3298 };
3299
3300 /************* KR ***************/
3301 static const UConverterImpl _ISO2022KRImpl={
3302     UCNV_ISO_2022,
3303
3304     NULL,
3305     NULL,
3306
3307     _ISO2022Open,
3308     _ISO2022Close,
3309     _ISO2022Reset,
3310
3311     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3312     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3313     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3314     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3315     NULL,
3316
3317     NULL,
3318     _ISO2022getName,
3319     _ISO_2022_WriteSub,
3320     _ISO_2022_SafeClone,
3321     _ISO_2022_GetUnicodeSet
3322 };
3323 static const UConverterStaticData _ISO2022KRStaticData={
3324     sizeof(UConverterStaticData),
3325     "ISO_2022_KR",
3326     0,
3327     UCNV_IBM,
3328     UCNV_ISO_2022,
3329     1,
3330     3, /* max 3 bytes per UChar: SO+DBCS */
3331     { 0x1a, 0, 0, 0 },
3332     1,
3333     FALSE,
3334     FALSE,
3335     0,
3336     0,
3337     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3338 };
3339 static const UConverterSharedData _ISO2022KRData={
3340     sizeof(UConverterSharedData),
3341     ~((uint32_t) 0),
3342     NULL,
3343     NULL,
3344     &_ISO2022KRStaticData,
3345     FALSE,
3346     &_ISO2022KRImpl,
3347     0
3348 };
3349
3350 /*************** CN ***************/
3351 static const UConverterImpl _ISO2022CNImpl={
3352
3353     UCNV_ISO_2022,
3354
3355     NULL,
3356     NULL,
3357
3358     _ISO2022Open,
3359     _ISO2022Close,
3360     _ISO2022Reset,
3361
3362     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3363     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3364     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3365     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3366     NULL,
3367
3368     NULL,
3369     _ISO2022getName,
3370     _ISO_2022_WriteSub,
3371     _ISO_2022_SafeClone,
3372     _ISO_2022_GetUnicodeSet
3373 };
3374 static const UConverterStaticData _ISO2022CNStaticData={
3375     sizeof(UConverterStaticData),
3376     "ISO_2022_CN",
3377     0,
3378     UCNV_IBM,
3379     UCNV_ISO_2022,
3380     1,
3381     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
3382     { 0x1a, 0, 0, 0 },
3383     1,
3384     FALSE,
3385     FALSE,
3386     0,
3387     0,
3388     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3389 };
3390 static const UConverterSharedData _ISO2022CNData={
3391     sizeof(UConverterSharedData),
3392     ~((uint32_t) 0),
3393     NULL,
3394     NULL,
3395     &_ISO2022CNStaticData,
3396     FALSE,
3397     &_ISO2022CNImpl,
3398     0
3399 };
3400
3401
3402
3403 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */