icuSources/test/cintltst/nucnvtst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2006, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /*******************************************************************************
   7 *
   8 * File CCONVTST.C
   9 *
  10 * Modification History:
  11 *        Name                     Description
  12 *    Steven R. Loomis     7/8/1999      Adding input buffer test
  13 ********************************************************************************
  14 */
  15 #include <stdio.h>
  16 #include "cstring.h"
  17 #include "unicode/uloc.h"
  18 #include "unicode/ucnv.h"
  19 #include "unicode/ucnv_err.h"
  20 #include "cintltst.h"
  21 #include "unicode/utypes.h"
  22 #include "unicode/ustring.h"
  23 #include "unicode/ucol.h"
  24 #include "cmemory.h"
  25
  26 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
  27 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
  28 #if !UCONFIG_NO_COLLATION
  29 static void TestJitterbug981(void);
  30 #endif
  31 static void TestJitterbug1293(void);
  32 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
  33 static void TestConverterTypesAndStarters(void);
  34 static void TestAmbiguous(void);
  35 static void TestSignatureDetection(void);
  36 static void TestUTF7(void);
  37 static void TestIMAP(void);
  38 static void TestUTF8(void);
  39 static void TestCESU8(void);
  40 static void TestUTF16(void);
  41 static void TestUTF16BE(void);
  42 static void TestUTF16LE(void);
  43 static void TestUTF32(void);
  44 static void TestUTF32BE(void);
  45 static void TestUTF32LE(void);
  46 static void TestLATIN1(void);
  47
  48 #if !UCONFIG_NO_LEGACY_CONVERSION
  49 static void TestSBCS(void);
  50 static void TestDBCS(void);
  51 static void TestMBCS(void);
  52
  53 #ifdef U_ENABLE_GENERIC_ISO_2022
  54 static void TestISO_2022(void);
  55 #endif
  56
  57 static void TestISO_2022_JP(void);
  58 static void TestISO_2022_JP_1(void);
  59 static void TestISO_2022_JP_2(void);
  60 static void TestISO_2022_KR(void);
  61 static void TestISO_2022_KR_1(void);
  62 static void TestISO_2022_CN(void);
  63 static void TestISO_2022_CN_EXT(void);
  64 static void TestJIS(void);
  65 static void TestHZ(void);
  66 #endif
  67
  68 static void TestSCSU(void);
  69
  70 #if !UCONFIG_NO_LEGACY_CONVERSION
  71 static void TestEBCDIC_STATEFUL(void);
  72 static void TestGB18030(void);
  73 static void TestLMBCS(void);
  74 static void TestJitterbug255(void);
  75 static void TestEBCDICUS4XML(void);
  76 static void TestJitterbug915(void);
  77 static void TestISCII(void);
  78
  79 static void TestCoverageMBCS(void);
  80 static void TestJitterbug2346(void);
  81 static void TestJitterbug2411(void);
  82 #endif
  83
  84 static void TestRoundTrippingAllUTF(void);
  85 static void TestConv(const uint16_t in[],
  86                      int len,
  87                      const char* conv,
  88                      const char* lang,
  89                      char byteArr[],
  90                      int byteArrLen);
  91 void addTestNewConvert(TestNode** root);
  92
  93 /* open a converter, using test data if it begins with '@' */
  94 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
  95
  96
  97 #define NEW_MAX_BUFFER 999
  98
  99 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
 100 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
 101 static char     gNuConvTestName[1024];
 102
 103 #define nct_min(x,y)  ((x<y) ? x : y)
 104
 105 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
 106 {
 107   if(cnv && cnv[0] == '@') {
 108     return ucnv_openPackage(loadTestData(err), cnv+1, err);
 109   } else {
 110     return ucnv_open(cnv, err);
 111   }
 112 }
 113
 114 static void printSeq(const unsigned char* a, int len)
 115 {
 116     int i=0;
 117     log_verbose("{");
 118     while (i<len)
 119         log_verbose("0x%02x ", a[i++]);
 120     log_verbose("}\n");
 121 }
 122
 123 static void printUSeq(const UChar* a, int len)
 124 {
 125     int i=0;
 126     log_verbose("{U+");
 127     while (i<len) log_verbose("0x%04x ", a[i++]);
 128     log_verbose("}\n");
 129 }
 130
 131 static void printSeqErr(const unsigned char* a, int len)
 132 {
 133     int i=0;
 134     fprintf(stderr, "{");
 135     while (i<len)
 136         fprintf(stderr, "0x%02x ", a[i++]);
 137     fprintf(stderr, "}\n");
 138 }
 139
 140 static void printUSeqErr(const UChar* a, int len)
 141 {
 142     int i=0;
 143     fprintf(stderr, "{U+");
 144     while (i<len)
 145         fprintf(stderr, "0x%04x ", a[i++]);
 146     fprintf(stderr,"}\n");
 147 }
 148
 149 static void
 150 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
 151 {
 152      const char* s0;
 153      const char* s=(char*)source;
 154      const int32_t *r=results;
 155      UErrorCode errorCode=U_ZERO_ERROR;
 156      UChar32 c;
 157
 158      while(s<limit) {
 159         s0=s;
 160         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 161         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 162             break; /* no more significant input */
 163         } else if(U_FAILURE(errorCode)) {
 164             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
 165             break;
 166         } else if(
 167             /* test the expected number of input bytes only if >=0 */
 168             (*r>=0 && (int32_t)(s-s0)!=*r) ||
 169             c!=*(r+1)
 170         ) {
 171             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
 172                 message, c, (s-s0), *(r+1), *r);
 173             break;
 174         }
 175         r+=2;
 176     }
 177 }
 178
 179 static void
 180 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
 181 {
 182      const char* s=(char*)source;
 183      UErrorCode errorCode=U_ZERO_ERROR;
 184      uint32_t c;
 185      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 186      if(errorCode != expected){
 187         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
 188      }
 189      if(c != 0xFFFD && c != 0xffff){
 190         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
 191      }
 192
 193 }
 194
 195 static void TestInBufSizes(void)
 196 {
 197   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
 198 #if 1
 199   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
 200   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
 201   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
 202   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
 203   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
 204   TestNewConvertWithBufferSizes(1,1);
 205   TestNewConvertWithBufferSizes(2,3);
 206   TestNewConvertWithBufferSizes(3,2);
 207 #endif
 208 }
 209
 210 static void TestOutBufSizes(void)
 211 {
 212 #if 1
 213   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
 214   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
 215   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
 216   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
 217   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
 218   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
 219
 220 #endif
 221 }
 222
 223
 224 void addTestNewConvert(TestNode** root)
 225 {
 226    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
 227    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
 228    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
 229    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
 230    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
 231    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
 232    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
 233    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
 234
 235    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
 236    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
 237    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
 238    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
 239    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
 240    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
 241    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
 242    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
 243
 244 #if !UCONFIG_NO_LEGACY_CONVERSION
 245    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
 246 #endif
 247
 248    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
 249
 250 #if !UCONFIG_NO_LEGACY_CONVERSION
 251    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
 252    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
 253    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
 254
 255 #ifdef U_ENABLE_GENERIC_ISO_2022
 256    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
 257 #endif
 258
 259    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
 260    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
 261    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
 262    addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
 263    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
 264    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
 265    addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
 266    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
 267    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
 268    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
 269 #endif
 270
 271    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
 272
 273 #if !UCONFIG_NO_LEGACY_CONVERSION
 274    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
 275    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
 276    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
 277    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
 278    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
 279
 280 #if !UCONFIG_NO_COLLATION
 281    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
 282 #endif
 283
 284    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
 285 #endif
 286
 287
 288 #if !UCONFIG_NO_LEGACY_CONVERSION
 289    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
 290 #endif
 291
 292    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
 293
 294 #if !UCONFIG_NO_LEGACY_CONVERSION
 295    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
 296    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
 297 #endif
 298
 299 }
 300
 301
 302 /* Note that this test already makes use of statics, so it's not really
 303    multithread safe.
 304    This convenience function lets us make the error messages actually useful.
 305 */
 306
 307 static void setNuConvTestName(const char *codepage, const char *direction)
 308 {
 309     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
 310         codepage,
 311         direction,
 312         (int)gInBufferSize,
 313         (int)gOutBufferSize);
 314 }
 315
 316 typedef enum
 317 {
 318   TC_OK       = 0,  /* test was OK */
 319   TC_MISMATCH = 1,  /* Match failed - err was printed */
 320   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
 321 } ETestConvertResult;
 322
 323 /* Note: This function uses global variables and it will not do offset
 324 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 325 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
 326                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
 327 {
 328     UErrorCode status = U_ZERO_ERROR;
 329     UConverter *conv = 0;
 330     char    junkout[NEW_MAX_BUFFER]; /* FIX */
 331     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 332     char *p;
 333     const UChar *src;
 334     char *end;
 335     char *targ;
 336     int32_t *offs;
 337     int i;
 338     int32_t   realBufferSize;
 339     char *realBufferEnd;
 340     const UChar *realSourceEnd;
 341     const UChar *sourceLimit;
 342     UBool checkOffsets = TRUE;
 343     UBool doFlush;
 344
 345     for(i=0;i<NEW_MAX_BUFFER;i++)
 346         junkout[i] = (char)0xF0;
 347     for(i=0;i<NEW_MAX_BUFFER;i++)
 348         junokout[i] = 0xFF;
 349
 350     setNuConvTestName(codepage, "FROM");
 351
 352     log_verbose("\n=========  %s\n", gNuConvTestName);
 353
 354     conv = my_ucnv_open(codepage, &status);
 355
 356     if(U_FAILURE(status))
 357     {
 358         log_data_err("Couldn't open converter %s\n",codepage);
 359         return TC_FAIL;
 360     }
 361     if(useFallback){
 362         ucnv_setFallback(conv,useFallback);
 363     }
 364
 365     log_verbose("Converter opened..\n");
 366
 367     src = source;
 368     targ = junkout;
 369     offs = junokout;
 370
 371     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 372     realBufferEnd = junkout + realBufferSize;
 373     realSourceEnd = source + sourceLen;
 374
 375     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
 376         checkOffsets = FALSE;
 377
 378     do
 379     {
 380       end = nct_min(targ + gOutBufferSize, realBufferEnd);
 381       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
 382
 383       doFlush = (UBool)(sourceLimit == realSourceEnd);
 384
 385       if(targ == realBufferEnd) {
 386         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
 387         return TC_FAIL;
 388       }
 389       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
 390
 391
 392       status = U_ZERO_ERROR;
 393
 394       ucnv_fromUnicode (conv,
 395                         &targ,
 396                         end,
 397                         &src,
 398                         sourceLimit,
 399                         checkOffsets ? offs : NULL,
 400                         doFlush, /* flush if we're at the end of the input data */
 401                         &status);
 402     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
 403
 404     if(U_FAILURE(status)) {
 405       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 406       return TC_FAIL;
 407     }
 408
 409     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
 410                 sourceLen, targ-junkout);
 411
 412     if(VERBOSITY)
 413     {
 414       char junk[9999];
 415       char offset_str[9999];
 416       char *ptr;
 417
 418       junk[0] = 0;
 419       offset_str[0] = 0;
 420       for(ptr = junkout;ptr<targ;ptr++) {
 421         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
 422         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
 423       }
 424
 425       log_verbose(junk);
 426       printSeq((const uint8_t *)expect, expectLen);
 427       if ( checkOffsets ) {
 428         log_verbose("\nOffsets:");
 429         log_verbose(offset_str);
 430       }
 431       log_verbose("\n");
 432     }
 433     ucnv_close(conv);
 434
 435     if(expectLen != targ-junkout) {
 436       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 437       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 438       printf("\nGot:");
 439       printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
 440       printf("\nExpected:");
 441       printSeqErr((const unsigned char*)expect, expectLen);
 442       return TC_MISMATCH;
 443     }
 444
 445     if (checkOffsets && (expectOffsets != 0) ) {
 446       log_verbose("comparing %d offsets..\n", targ-junkout);
 447       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
 448         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
 449         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
 450         log_err("\n");
 451         log_err("Got  :     ");
 452         for(p=junkout;p<targ;p++) {
 453           log_err("%d,", junokout[p-junkout]);
 454         }
 455         log_err("\n");
 456         log_err("Expected:  ");
 457         for(i=0; i<(targ-junkout); i++) {
 458           log_err("%d,", expectOffsets[i]);
 459         }
 460         log_err("\n");
 461       }
 462     }
 463
 464     log_verbose("comparing..\n");
 465     if(!memcmp(junkout, expect, expectLen)) {
 466       log_verbose("Matches!\n");
 467       return TC_OK;
 468     } else {
 469       log_err("String does not match u->%s\n", gNuConvTestName);
 470       printUSeqErr(source, sourceLen);
 471       printf("\nGot:");
 472       printSeqErr((const unsigned char *)junkout, expectLen);
 473       printf("\nExpected:");
 474       printSeqErr((const unsigned char *)expect, expectLen);
 475
 476       return TC_MISMATCH;
 477     }
 478 }
 479
 480 /* Note: This function uses global variables and it will not do offset
 481 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 482 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
 483                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
 484 {
 485     UErrorCode status = U_ZERO_ERROR;
 486     UConverter *conv = 0;
 487     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
 488     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 489     const char *src;
 490     const char *realSourceEnd;
 491     const char *srcLimit;
 492     UChar *p;
 493     UChar *targ;
 494     UChar *end;
 495     int32_t *offs;
 496     int i;
 497     UBool   checkOffsets = TRUE;
 498
 499     int32_t   realBufferSize;
 500     UChar *realBufferEnd;
 501
 502
 503     for(i=0;i<NEW_MAX_BUFFER;i++)
 504         junkout[i] = 0xFFFE;
 505
 506     for(i=0;i<NEW_MAX_BUFFER;i++)
 507         junokout[i] = -1;
 508
 509     setNuConvTestName(codepage, "TO");
 510
 511     log_verbose("\n=========  %s\n", gNuConvTestName);
 512
 513     conv = my_ucnv_open(codepage, &status);
 514
 515     if(U_FAILURE(status))
 516     {
 517         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
 518         return TC_FAIL;
 519     }
 520     if(useFallback){
 521         ucnv_setFallback(conv,useFallback);
 522     }
 523     log_verbose("Converter opened..\n");
 524
 525     src = (const char *)source;
 526     targ = junkout;
 527     offs = junokout;
 528
 529     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 530     realBufferEnd = junkout + realBufferSize;
 531     realSourceEnd = src + sourcelen;
 532
 533     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
 534         checkOffsets = FALSE;
 535
 536     do
 537     {
 538         end = nct_min( targ + gOutBufferSize, realBufferEnd);
 539         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
 540
 541         if(targ == realBufferEnd)
 542         {
 543             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
 544             return TC_FAIL;
 545         }
 546         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
 547
 548         /* oldTarg = targ; */
 549
 550         status = U_ZERO_ERROR;
 551
 552         ucnv_toUnicode (conv,
 553                 &targ,
 554                 end,
 555                 &src,
 556                 srcLimit,
 557                 checkOffsets ? offs : NULL,
 558                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
 559                 &status);
 560
 561         /*        offs += (targ-oldTarg); */
 562
 563       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
 564
 565     if(U_FAILURE(status))
 566     {
 567         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 568         return TC_FAIL;
 569     }
 570
 571     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
 572         sourcelen, targ-junkout);
 573     if(VERBOSITY)
 574     {
 575         char junk[9999];
 576         char offset_str[9999];
 577         UChar *ptr;
 578
 579         junk[0] = 0;
 580         offset_str[0] = 0;
 581
 582         for(ptr = junkout;ptr<targ;ptr++)
 583         {
 584             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
 585             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
 586         }
 587
 588         log_verbose(junk);
 589         printUSeq(expect, expectlen);
 590         if ( checkOffsets )
 591           {
 592             log_verbose("\nOffsets:");
 593             log_verbose(offset_str);
 594           }
 595         log_verbose("\n");
 596     }
 597     ucnv_close(conv);
 598
 599     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
 600
 601     if (checkOffsets && (expectOffsets != 0))
 602     {
 603         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
 604             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
 605             log_err("Got:      ");
 606             for(p=junkout;p<targ;p++) {
 607                 log_err("%d,", junokout[p-junkout]);
 608             }
 609             log_err("\n");
 610             log_err("Expected: ");
 611             for(i=0; i<(targ-junkout); i++) {
 612                 log_err("%d,", expectOffsets[i]);
 613             }
 614             log_err("\n");
 615             log_err("output:   ");
 616             for(i=0; i<(targ-junkout); i++) {
 617                 log_err("%X,", junkout[i]);
 618             }
 619             log_err("\n");
 620             log_err("input:    ");
 621             for(i=0; i<(src-(const char *)source); i++) {
 622                 log_err("%X,", (unsigned char)source[i]);
 623             }
 624             log_err("\n");
 625         }
 626     }
 627
 628     if(!memcmp(junkout, expect, expectlen*2))
 629     {
 630         log_verbose("Matches!\n");
 631         return TC_OK;
 632     }
 633     else
 634     {
 635         log_err("String does not match. %s\n", gNuConvTestName);
 636         log_verbose("String does not match. %s\n", gNuConvTestName);
 637         printf("\nGot:");
 638         printUSeqErr(junkout, expectlen);
 639         printf("\nExpected:");
 640         printUSeqErr(expect, expectlen);
 641         return TC_MISMATCH;
 642     }
 643 }
 644
 645
 646 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
 647 {
 648 /** test chars #1 */
 649     /*  1 2 3  1Han 2Han 3Han .  */
 650     static const UChar   sampleText[] =
 651      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
 652
 653
 654     static const uint8_t expectedUTF8[] =
 655      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
 656     static const int32_t toUTF8Offs[] =
 657      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
 658     static const int32_t fmUTF8Offs[] =
 659      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
 660
 661 #ifdef U_ENABLE_GENERIC_ISO_2022
 662     /* Same as UTF8, but with ^[%B preceeding */
 663     static const const uint8_t expectedISO2022[] =
 664      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
 665     static const int32_t toISO2022Offs[]     =
 666      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
 667        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
 668     static const int32_t fmISO2022Offs[] =
 669      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
 670 #endif
 671
 672     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
 673     static const uint8_t expectedIBM930[] =
 674      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
 675     static const int32_t toIBM930Offs[] =
 676      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
 677     static const int32_t fmIBM930Offs[] =
 678      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
 679
 680     /* 1 2 3 0 h1 h2 h3 . MBCS*/
 681     static const uint8_t expectedIBM943[] =
 682      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
 683     static const int32_t toIBM943Offs    [] =
 684      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
 685     static const int32_t fmIBM943Offs[] =
 686      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
 687
 688     /* 1 2 3 0 h1 h2 h3 . DBCS*/
 689     static const uint8_t expectedIBM9027[] =
 690      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
 691     static const int32_t toIBM9027Offs    [] =
 692      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
 693
 694      /* 1 2 3 0 <?> <?> <?> . SBCS*/
 695     static const uint8_t expectedIBM920[] =
 696      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
 697     static const int32_t toIBM920Offs    [] =
 698      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
 699
 700     /* 1 2 3 0 <?> <?> <?> . SBCS*/
 701     static const uint8_t expectedISO88593[] =
 702      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
 703     static const int32_t toISO88593Offs[]     =
 704      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
 705
 706     /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
 707     static const uint8_t expectedLATIN1[] =
 708      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
 709     static const int32_t toLATIN1Offs[]     =
 710      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
 711
 712
 713     /*  etc */
 714     static const uint8_t expectedUTF16BE[] =
 715      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
 716     static const int32_t toUTF16BEOffs[]=
 717      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
 718     static const int32_t fmUTF16BEOffs[] =
 719      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e };
 720
 721     static const uint8_t expectedUTF16LE[] =
 722      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
 723     static const int32_t toUTF16LEOffs[]=
 724      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07};
 725     static const int32_t fmUTF16LEOffs[] =
 726      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e };
 727
 728     static const uint8_t expectedUTF32BE[] =
 729      { 0x00, 0x00, 0x00, 0x31,
 730        0x00, 0x00, 0x00, 0x32,
 731        0x00, 0x00, 0x00, 0x33,
 732        0x00, 0x00, 0x00, 0x00,
 733        0x00, 0x00, 0x4e, 0x00,
 734        0x00, 0x00, 0x4e, 0x8c,
 735        0x00, 0x00, 0x4e, 0x09,
 736        0x00, 0x00, 0x00, 0x2e };
 737     static const int32_t toUTF32BEOffs[]=
 738      { 0x00, 0x00, 0x00, 0x00,
 739        0x01, 0x01, 0x01, 0x01,
 740        0x02, 0x02, 0x02, 0x02,
 741        0x03, 0x03, 0x03, 0x03,
 742        0x04, 0x04, 0x04, 0x04,
 743        0x05, 0x05, 0x05, 0x05,
 744        0x06, 0x06, 0x06, 0x06,
 745        0x07, 0x07, 0x07, 0x07,
 746        0x08, 0x08, 0x08, 0x08 };
 747     static const int32_t fmUTF32BEOffs[] =
 748      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c };
 749
 750     static const uint8_t expectedUTF32LE[] =
 751      { 0x31, 0x00, 0x00, 0x00,
 752        0x32, 0x00, 0x00, 0x00,
 753        0x33, 0x00, 0x00, 0x00,
 754        0x00, 0x00, 0x00, 0x00,
 755        0x00, 0x4e, 0x00, 0x00,
 756        0x8c, 0x4e, 0x00, 0x00,
 757        0x09, 0x4e, 0x00, 0x00,
 758        0x2e, 0x00, 0x00, 0x00 };
 759     static const int32_t toUTF32LEOffs[]=
 760      { 0x00, 0x00, 0x00, 0x00,
 761        0x01, 0x01, 0x01, 0x01,
 762        0x02, 0x02, 0x02, 0x02,
 763        0x03, 0x03, 0x03, 0x03,
 764        0x04, 0x04, 0x04, 0x04,
 765        0x05, 0x05, 0x05, 0x05,
 766        0x06, 0x06, 0x06, 0x06,
 767        0x07, 0x07, 0x07, 0x07,
 768        0x08, 0x08, 0x08, 0x08 };
 769     static const int32_t fmUTF32LEOffs[] =
 770      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c };
 771
 772
 773
 774
 775 /** Test chars #2 **/
 776
 777     /* Sahha [health],  slashed h's */
 778     static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
 779     static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
 780
 781     /* LMBCS */
 782     static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
 783     static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
 784     static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
 785     static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
 786     /*********************************** START OF CODE finally *************/
 787
 788     gInBufferSize = insize;
 789     gOutBufferSize = outsize;
 790
 791     log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
 792
 793
 794     /*UTF-8*/
 795     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 796         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
 797
 798     log_verbose("Test surrogate behaviour for UTF8\n");
 799     {
 800         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
 801         static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
 802                            0xf0, 0x90, 0x90, 0x81,
 803                            0xef, 0xbf, 0xbd
 804         };
 805         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
 806         testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
 807                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
 808
 809
 810     }
 811
 812 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
 813     /*ISO-2022*/
 814     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 815         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
 816 #endif
 817
 818     /*UTF16 LE*/
 819     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 820         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
 821     /*UTF16 BE*/
 822     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 823         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
 824     /*UTF32 LE*/
 825     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 826         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
 827     /*UTF32 BE*/
 828     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 829         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
 830
 831     /*LATIN_1*/
 832     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 833         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
 834
 835 #if !UCONFIG_NO_LEGACY_CONVERSION
 836     /*EBCDIC_STATEFUL*/
 837     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 838         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
 839
 840     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 841         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 842
 843     /*MBCS*/
 844
 845     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 846         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
 847     /*DBCS*/
 848     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 849         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
 850     /*SBCS*/
 851     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 852         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
 853     /*SBCS*/
 854     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 855         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 856 #endif
 857
 858
 859 /****/
 860
 861     /*UTF-8*/
 862     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
 863         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
 864 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
 865     /*ISO-2022*/
 866     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
 867         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
 868 #endif
 869
 870     /*UTF16 LE*/
 871     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 872         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 873     /*UTF16 BE*/
 874     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
 875         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
 876     /*UTF32 LE*/
 877     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
 878         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
 879     /*UTF32 BE*/
 880     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
 881         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
 882
 883 #if !UCONFIG_NO_LEGACY_CONVERSION
 884     /*EBCDIC_STATEFUL*/
 885     testConvertToU(expectedIBM930, sizeof(expectedIBM930),
 886         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
 887     /*MBCS*/
 888     testConvertToU(expectedIBM943, sizeof(expectedIBM943),
 889         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
 890 #endif
 891
 892     /* Try it again to make sure it still works */
 893     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 894         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 895
 896 #if !UCONFIG_NO_LEGACY_CONVERSION
 897     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
 898         malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
 899
 900     testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
 901         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
 902
 903     /*LMBCS*/
 904     testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
 905         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
 906     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
 907         LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
 908 #endif
 909
 910     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
 911     {
 912         /* encode directly set D and set O */
 913         static const uint8_t utf7[] = {
 914             /*
 915                 Hi Mom -+Jjo--!
 916                 A+ImIDkQ.
 917                 +-
 918                 +ZeVnLIqe
 919             */
 920             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
 921             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 922             0x2b, 0x2d,
 923             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 924         };
 925         static const UChar unicode[] = {
 926             /*
 927                 Hi Mom -<WHITE SMILING FACE>-!
 928                 A<NOT IDENTICAL TO><ALPHA>.
 929                 +
 930                 [Japanese word "nihongo"]
 931             */
 932             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
 933             0x41, 0x2262, 0x0391, 0x2e,
 934             0x2b,
 935             0x65e5, 0x672c, 0x8a9e
 936         };
 937         static const int32_t toUnicodeOffsets[] = {
 938             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
 939             15, 17, 19, 23,
 940             24,
 941             27, 29, 32
 942         };
 943         static const int32_t fromUnicodeOffsets[] = {
 944             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
 945             11, 12, 12, 12, 13, 13, 13, 13, 14,
 946             15, 15,
 947             16, 16, 16, 17, 17, 17, 18, 18, 18
 948         };
 949
 950         /* same but escaping set O (the exclamation mark) */
 951         static const uint8_t utf7Restricted[] = {
 952             /*
 953                 Hi Mom -+Jjo--+ACE-
 954                 A+ImIDkQ.
 955                 +-
 956                 +ZeVnLIqe
 957             */
 958             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
 959             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 960             0x2b, 0x2d,
 961             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 962         };
 963         static const int32_t toUnicodeOffsetsR[] = {
 964             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
 965             19, 21, 23, 27,
 966             28,
 967             31, 33, 36
 968         };
 969         static const int32_t fromUnicodeOffsetsR[] = {
 970             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
 971             11, 12, 12, 12, 13, 13, 13, 13, 14,
 972             15, 15,
 973             16, 16, 16, 17, 17, 17, 18, 18, 18
 974         };
 975
 976         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
 977
 978         testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
 979
 980         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
 981
 982         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
 983     }
 984
 985     /*
 986      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
 987      * modified according to RFC 2060,
 988      * and supplemented with the one example in RFC 2060 itself.
 989      */
 990     {
 991         static const uint8_t imap[] = {
 992             /*  Hi Mom -&Jjo--!
 993                 A&ImIDkQ-.
 994                 &-
 995                 &ZeVnLIqe-
 996                 \
 997                 ~peter
 998                 /mail
 999                 /&ZeVnLIqe-
1000                 /&U,BTFw-
1001             */
1002             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1003             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1004             0x26, 0x2d,
1005             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1006             0x5c,
1007             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1008             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1009             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1010             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1011         };
1012         static const UChar unicode[] = {
1013             /*  Hi Mom -<WHITE SMILING FACE>-!
1014                 A<NOT IDENTICAL TO><ALPHA>.
1015                 &
1016                 [Japanese word "nihongo"]
1017                 \
1018                 ~peter
1019                 /mail
1020                 /<65e5, 672c, 8a9e>
1021                 /<53f0, 5317>
1022             */
1023             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1024             0x41, 0x2262, 0x0391, 0x2e,
1025             0x26,
1026             0x65e5, 0x672c, 0x8a9e,
1027             0x5c,
1028             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1029             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1030             0x2f, 0x65e5, 0x672c, 0x8a9e,
1031             0x2f, 0x53f0, 0x5317
1032         };
1033         static const int32_t toUnicodeOffsets[] = {
1034             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1035             15, 17, 19, 24,
1036             25,
1037             28, 30, 33,
1038             37,
1039             38, 39, 40, 41, 42, 43,
1040             44, 45, 46, 47, 48,
1041             49, 51, 53, 56,
1042             60, 62, 64
1043         };
1044         static const int32_t fromUnicodeOffsets[] = {
1045             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1046             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1047             15, 15,
1048             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1049             19,
1050             20, 21, 22, 23, 24, 25,
1051             26, 27, 28, 29, 30,
1052             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1053             35, 36, 36, 36, 37, 37, 37, 37, 37
1054         };
1055
1056         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1057
1058         testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1059     }
1060
1061     /* Test UTF-8 bad data handling*/
1062     {
1063         static const uint8_t utf8[]={
1064             0x61,
1065             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1066             0x00,
1067             0x62,
1068             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1069             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1070             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1071             0xdf, 0xbf,                     /* 7ff */
1072             0xbf,                           /* truncated tail */
1073             0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1074             0x02
1075         };
1076
1077         static const uint16_t utf8Expected[]={
1078             0x0061,
1079             0xfffd,
1080             0x0000,
1081             0x0062,
1082             0xfffd,
1083             0xfffd,
1084             0xdbff, 0xdfff,
1085             0x07ff,
1086             0xfffd,
1087             0xfffd,
1088             0x0002
1089         };
1090
1091         static const int32_t utf8Offsets[]={
1092             0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1093         };
1094         testConvertToU(utf8, sizeof(utf8),
1095                        utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1096
1097     }
1098
1099     /* Test UTF-32BE bad data handling*/
1100     {
1101         static const uint8_t utf32[]={
1102             0x00, 0x00, 0x00, 0x61,
1103             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1104             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1105             0x00, 0x00, 0x00, 0x62,
1106             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1107             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1108             0x00, 0x00, 0x01, 0x62,
1109             0x00, 0x00, 0x02, 0x62
1110         };
1111         static const uint16_t utf32Expected[]={
1112             0x0061,
1113             0xfffd,         /* 0x110000 out of range */
1114             0xDBFF,         /* 0x10FFFF in range */
1115             0xDFFF,
1116             0x0062,
1117             0xfffd,         /* 0xffffffff out of range */
1118             0xfffd,         /* 0x7fffffff out of range */
1119             0x0162,
1120             0x0262
1121         };
1122         static const int32_t utf32Offsets[]={
1123             0, 4, 8, 8, 12, 16, 20, 24, 28
1124         };
1125         static const uint8_t utf32ExpectedBack[]={
1126             0x00, 0x00, 0x00, 0x61,
1127             0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1128             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1129             0x00, 0x00, 0x00, 0x62,
1130             0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1131             0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1132             0x00, 0x00, 0x01, 0x62,
1133             0x00, 0x00, 0x02, 0x62
1134         };
1135         static const int32_t utf32OffsetsBack[]={
1136             0,0,0,0,
1137             1,1,1,1,
1138             2,2,2,2,
1139             4,4,4,4,
1140             5,5,5,5,
1141             6,6,6,6,
1142             7,7,7,7,
1143             8,8,8,8
1144         };
1145
1146         testConvertToU(utf32, sizeof(utf32),
1147                        utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1148         testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1149             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1150     }
1151
1152     /* Test UTF-32LE bad data handling*/
1153     {
1154         static const uint8_t utf32[]={
1155             0x61, 0x00, 0x00, 0x00,
1156             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1157             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1158             0x62, 0x00, 0x00, 0x00,
1159             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1160             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1161             0x62, 0x01, 0x00, 0x00,
1162             0x62, 0x02, 0x00, 0x00,
1163         };
1164
1165         static const uint16_t utf32Expected[]={
1166             0x0061,
1167             0xfffd,         /* 0x110000 out of range */
1168             0xDBFF,         /* 0x10FFFF in range */
1169             0xDFFF,
1170             0x0062,
1171             0xfffd,         /* 0xffffffff out of range */
1172             0xfffd,         /* 0x7fffffff out of range */
1173             0x0162,
1174             0x0262
1175         };
1176         static const int32_t utf32Offsets[]={
1177             0, 4, 8, 8, 12, 16, 20, 24, 28
1178         };
1179         static const uint8_t utf32ExpectedBack[]={
1180             0x61, 0x00, 0x00, 0x00,
1181             0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1182             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1183             0x62, 0x00, 0x00, 0x00,
1184             0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1185             0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1186             0x62, 0x01, 0x00, 0x00,
1187             0x62, 0x02, 0x00, 0x00
1188         };
1189         static const int32_t utf32OffsetsBack[]={
1190             0,0,0,0,
1191             1,1,1,1,
1192             2,2,2,2,
1193             4,4,4,4,
1194             5,5,5,5,
1195             6,6,6,6,
1196             7,7,7,7,
1197             8,8,8,8
1198         };
1199         testConvertToU(utf32, sizeof(utf32),
1200             utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1201         testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1202             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1203     }
1204 }
1205
1206 static void TestCoverageMBCS(){
1207 #if 0
1208     UErrorCode status = U_ZERO_ERROR;
1209     const char *directory = loadTestData(&status);
1210     char* tdpath = NULL;
1211     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1212     int len = strlen(directory);
1213     char* index=NULL;
1214
1215     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1216     uprv_strcpy(saveDirectory,u_getDataDirectory());
1217     log_verbose("Retrieved data directory %s \n",saveDirectory);
1218     uprv_strcpy(tdpath,directory);
1219     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1220
1221     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1222             *(index+1)=0;
1223     }
1224     u_setDataDirectory(tdpath);
1225     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1226 #endif
1227
1228     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1229       which is test file for MBCS conversion with single-byte codepage data.*/
1230     {
1231
1232         /* MBCS with single byte codepage data test1.ucm*/
1233         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1234         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1235         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1236
1237         /*from Unicode*/
1238         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1239             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1240     }
1241
1242     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1243       which is test file for MBCS conversion with three-byte codepage data.*/
1244     {
1245
1246         /* MBCS with three byte codepage data test3.ucm*/
1247         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1248         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1249         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1250
1251         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1252         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1253         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1254
1255         /*from Unicode*/
1256         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1257             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1258
1259         /*to Unicode*/
1260         testConvertToU(test3input, sizeof(test3input),
1261             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1262
1263     }
1264
1265     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1266       which is test file for MBCS conversion with four-byte codepage data.*/
1267     {
1268
1269         /* MBCS with three byte codepage data test4.ucm*/
1270         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1271         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1272         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1273
1274         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1275         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1276         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1277
1278         /*from Unicode*/
1279         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1280             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1281
1282         /*to Unicode*/
1283         testConvertToU(test4input, sizeof(test4input),
1284             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1285
1286     }
1287 #if 0
1288     free(tdpath);
1289     /* restore the original data directory */
1290     log_verbose("Setting the data directory to %s \n", saveDirectory);
1291     u_setDataDirectory(saveDirectory);
1292     free(saveDirectory);
1293 #endif
1294
1295 }
1296
1297 static void TestConverterType(const char *convName, UConverterType convType) {
1298     UConverter* myConverter;
1299     UErrorCode err = U_ZERO_ERROR;
1300
1301     myConverter = my_ucnv_open(convName, &err);
1302
1303     if (U_FAILURE(err)) {
1304         log_data_err("Failed to create an %s converter\n", convName);
1305         return;
1306     }
1307     else
1308     {
1309         if (ucnv_getType(myConverter)!=convType) {
1310             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1311                 convName, convType);
1312         }
1313         else {
1314             log_verbose("ucnv_getType %s ok\n", convName);
1315         }
1316     }
1317     ucnv_close(myConverter);
1318 }
1319
1320 static void TestConverterTypesAndStarters()
1321 {
1322 #if !UCONFIG_NO_LEGACY_CONVERSION
1323     UConverter* myConverter;
1324     UErrorCode err = U_ZERO_ERROR;
1325     UBool mystarters[256];
1326
1327 /*    const UBool expectedKSCstarters[256] = {
1328         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1329         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1330         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1331         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1332         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1333         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1334         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1335         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1336         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1337         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1338         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1339         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1340         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1341         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1342         FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1343         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1344         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1345         TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1346         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1347         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1348         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1349         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1350         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1351         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1352         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1353         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1354
1355
1356     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1357
1358     myConverter = ucnv_open("ksc", &err);
1359     if (U_FAILURE(err)) {
1360       log_data_err("Failed to create an ibm-ksc converter\n");
1361       return;
1362     }
1363     else
1364     {
1365         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1366             log_err("ucnv_getType Failed for ibm-949\n");
1367         else
1368             log_verbose("ucnv_getType ibm-949 ok\n");
1369
1370         if(myConverter!=NULL)
1371             ucnv_getStarters(myConverter, mystarters, &err);
1372
1373         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1374           log_err("Failed ucnv_getStarters for ksc\n");
1375           else
1376           log_verbose("ucnv_getStarters ok\n");*/
1377
1378     }
1379     ucnv_close(myConverter);
1380
1381     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1382     TestConverterType("ibm-878", UCNV_SBCS);
1383 #endif
1384
1385     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1386
1387     TestConverterType("ibm-1208", UCNV_UTF8);
1388
1389     TestConverterType("utf-8", UCNV_UTF8);
1390     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1391     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1392     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1393     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1394
1395 #if !UCONFIG_NO_LEGACY_CONVERSION
1396
1397 #if defined(U_ENABLE_GENERIC_ISO_2022)
1398     TestConverterType("iso-2022", UCNV_ISO_2022);
1399 #endif
1400
1401     TestConverterType("hz", UCNV_HZ);
1402 #endif
1403
1404     TestConverterType("scsu", UCNV_SCSU);
1405
1406 #if !UCONFIG_NO_LEGACY_CONVERSION
1407     TestConverterType("x-iscii-de", UCNV_ISCII);
1408 #endif
1409
1410     TestConverterType("ascii", UCNV_US_ASCII);
1411     TestConverterType("utf-7", UCNV_UTF7);
1412     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1413     TestConverterType("bocu-1", UCNV_BOCU1);
1414 }
1415
1416 static void
1417 TestAmbiguousConverter(UConverter *cnv) {
1418     static const char inBytes[2]={ 0x61, 0x5c };
1419     UChar outUnicode[20]={ 0, 0, 0, 0 };
1420
1421     const char *s;
1422     UChar *u;
1423     UErrorCode errorCode;
1424     UBool isAmbiguous;
1425
1426     /* try to convert an 'a' and a US-ASCII backslash */
1427     errorCode=U_ZERO_ERROR;
1428     s=inBytes;
1429     u=outUnicode;
1430     ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
1431     if(U_FAILURE(errorCode)) {
1432         /* we do not care about general failures in this test; the input may just not be mappable */
1433         return;
1434     }
1435
1436     if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
1437         /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1438         return;
1439     }
1440
1441     isAmbiguous=ucnv_isAmbiguous(cnv);
1442
1443     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1444     if((outUnicode[1]!=0x5c)!=isAmbiguous) {
1445         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1446             ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
1447         return;
1448     }
1449
1450     if(outUnicode[1]!=0x5c) {
1451         /* needs fixup, fix it */
1452         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1453         if(outUnicode[1]!=0x5c) {
1454             /* the fix failed */
1455             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1456             return;
1457         }
1458     }
1459 }
1460
1461 static void TestAmbiguous()
1462 {
1463     UErrorCode status = U_ZERO_ERROR;
1464     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1465     static const char target[] = {
1466         /* "\\usr\\local\\share\\data\\icutest.txt" */
1467         0x5c, 0x75, 0x73, 0x72,
1468         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1469         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1470         0x5c, 0x64, 0x61, 0x74, 0x61,
1471         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1472         0
1473     };
1474     UChar asciiResult[200], sjisResult[200];
1475     int32_t asciiLength = 0, sjisLength = 0, i;
1476     const char *name;
1477
1478     /* enumerate all converters */
1479     status=U_ZERO_ERROR;
1480     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1481         cnv=ucnv_open(name, &status);
1482         if(U_SUCCESS(status)) {
1483             TestAmbiguousConverter(cnv);
1484             ucnv_close(cnv);
1485         } else {
1486             log_err("error: unable to open available converter \"%s\"\n", name);
1487             status=U_ZERO_ERROR;
1488         }
1489     }
1490
1491 #if !UCONFIG_NO_LEGACY_CONVERSION
1492     sjis_cnv = ucnv_open("ibm-943", &status);
1493     if (U_FAILURE(status))
1494     {
1495         log_data_err("Failed to create a SJIS converter\n");
1496         return;
1497     }
1498     ascii_cnv = ucnv_open("LATIN-1", &status);
1499     if (U_FAILURE(status))
1500     {
1501         log_data_err("Failed to create a LATIN-1 converter\n");
1502         ucnv_close(sjis_cnv);
1503         return;
1504     }
1505     /* convert target from SJIS to Unicode */
1506     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1507     if (U_FAILURE(status))
1508     {
1509         log_err("Failed to convert the SJIS string.\n");
1510         ucnv_close(sjis_cnv);
1511         ucnv_close(ascii_cnv);
1512         return;
1513     }
1514     /* convert target from Latin-1 to Unicode */
1515     asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1516     if (U_FAILURE(status))
1517     {
1518         log_err("Failed to convert the Latin-1 string.\n");
1519         free(sjisResult);
1520         ucnv_close(sjis_cnv);
1521         ucnv_close(ascii_cnv);
1522         return;
1523     }
1524     if (!ucnv_isAmbiguous(sjis_cnv))
1525     {
1526         log_err("SJIS converter should contain ambiguous character mappings.\n");
1527         free(sjisResult);
1528         free(asciiResult);
1529         ucnv_close(sjis_cnv);
1530         ucnv_close(ascii_cnv);
1531         return;
1532     }
1533     if (u_strcmp(sjisResult, asciiResult) == 0)
1534     {
1535         log_err("File separators for SJIS don't need to be fixed.\n");
1536     }
1537     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1538     if (u_strcmp(sjisResult, asciiResult) != 0)
1539     {
1540         log_err("Fixing file separator for SJIS failed.\n");
1541     }
1542     ucnv_close(sjis_cnv);
1543     ucnv_close(ascii_cnv);
1544 #endif
1545 }
1546
1547 static void
1548 TestSignatureDetection(){
1549     /* with null terminated strings */
1550     {
1551         static const char* data[] = {
1552                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1553                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1554                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1555                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1556
1557                 "\xFE\xFF",             /* UTF-16BE */
1558                 "\xFF\xFE",             /* UTF-16LE */
1559                 "\xEF\xBB\xBF",         /* UTF-8    */
1560                 "\x0E\xFE\xFF",         /* SCSU     */
1561
1562                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1563                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1564                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1565                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1566
1567                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1568                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1569                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1570                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1571                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1572
1573                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1574         };
1575         static const char* expected[] = {
1576                 "UTF-16BE",
1577                 "UTF-16LE",
1578                 "UTF-8",
1579                 "SCSU",
1580
1581                 "UTF-16BE",
1582                 "UTF-16LE",
1583                 "UTF-8",
1584                 "SCSU",
1585
1586                 "UTF-16BE",
1587                 "UTF-16LE",
1588                 "UTF-8",
1589                 "SCSU",
1590
1591                 "UTF-7",
1592                 "UTF-7",
1593                 "UTF-7",
1594                 "UTF-7",
1595                 "UTF-7",
1596                 "UTF-EBCDIC"
1597         };
1598         static const int32_t expectedLength[] ={
1599             2,
1600             2,
1601             3,
1602             3,
1603
1604             2,
1605             2,
1606             3,
1607             3,
1608
1609             2,
1610             2,
1611             3,
1612             3,
1613
1614             5,
1615             4,
1616             4,
1617             4,
1618             4,
1619             4
1620         };
1621         int i=0;
1622         UErrorCode err;
1623         int32_t signatureLength = -1;
1624         const char* source = NULL;
1625         const char* enc = NULL;
1626         for( ; i<sizeof(data)/sizeof(char*); i++){
1627             err = U_ZERO_ERROR;
1628             source = data[i];
1629             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1630             if(U_FAILURE(err)){
1631                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1632                 continue;
1633             }
1634             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1635                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1636                 continue;
1637             }
1638             if(signatureLength != expectedLength[i]){
1639                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1640             }
1641         }
1642     }
1643     {
1644         static const char* data[] = {
1645                 "\xFE\xFF\x00",         /* UTF-16BE */
1646                 "\xFF\xFE\x00",         /* UTF-16LE */
1647                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1648                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1649                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1650                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1651                 "\xFE\xFF",             /* UTF-16BE */
1652                 "\xFF\xFE",             /* UTF-16LE */
1653                 "\xEF\xBB\xBF",         /* UTF-8    */
1654                 "\x0E\xFE\xFF",         /* SCSU     */
1655                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1656                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1657                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1658                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1659                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1660                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1661                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1662                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1663                 "\xFB\xEE\x28",         /* BOCU-1   */
1664                 "\xFF\x41\x42"          /* NULL     */
1665         };
1666         static const int len[] = {
1667             3,
1668             3,
1669             4,
1670             4,
1671             4,
1672             4,
1673             2,
1674             2,
1675             3,
1676             3,
1677             4,
1678             4,
1679             4,
1680             4,
1681             4,
1682             4,
1683             5,
1684             5,
1685             3,
1686             3
1687         };
1688
1689         static const char* expected[] = {
1690                 "UTF-16BE",
1691                 "UTF-16LE",
1692                 "UTF-8",
1693                 "SCSU",
1694                 "UTF-32BE",
1695                 "UTF-32LE",
1696                 "UTF-16BE",
1697                 "UTF-16LE",
1698                 "UTF-8",
1699                 "SCSU",
1700                 "UTF-32BE",
1701                 "UTF-32LE",
1702                 "UTF-16BE",
1703                 "UTF-16LE",
1704                 "UTF-8",
1705                 "SCSU",
1706                 "UTF-32BE",
1707                 "UTF-32LE",
1708                 "BOCU-1",
1709                 NULL
1710         };
1711         static const int32_t expectedLength[] ={
1712             2,
1713             2,
1714             3,
1715             3,
1716             4,
1717             4,
1718             2,
1719             2,
1720             3,
1721             3,
1722             4,
1723             4,
1724             2,
1725             2,
1726             3,
1727             3,
1728             4,
1729             4,
1730             3,
1731             0
1732         };
1733         int i=0;
1734         UErrorCode err;
1735         int32_t signatureLength = -1;
1736         int32_t sourceLength=-1;
1737         const char* source = NULL;
1738         const char* enc = NULL;
1739         for( ; i<sizeof(data)/sizeof(char*); i++){
1740             err = U_ZERO_ERROR;
1741             source = data[i];
1742             sourceLength = len[i];
1743             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1744             if(U_FAILURE(err)){
1745                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1746                 continue;
1747             }
1748             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1749                 if(expected[i] !=NULL){
1750                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1751                  continue;
1752                 }
1753             }
1754             if(signatureLength != expectedLength[i]){
1755                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1756             }
1757         }
1758     }
1759 }
1760
1761 void
1762 static TestUTF7() {
1763     /* test input */
1764     static const uint8_t in[]={
1765         /* H - +Jjo- - ! +- +2AHcAQ */
1766         0x48,
1767         0x2d,
1768         0x2b, 0x4a, 0x6a, 0x6f,
1769         0x2d, 0x2d,
1770         0x21,
1771         0x2b, 0x2d,
1772         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1773     };
1774
1775     /* expected test results */
1776     static const int32_t results[]={
1777         /* number of bytes read, code point */
1778         1, 0x48,
1779         1, 0x2d,
1780         4, 0x263a, /* <WHITE SMILING FACE> */
1781         2, 0x2d,
1782         1, 0x21,
1783         2, 0x2b,
1784         7, 0x10401
1785     };
1786
1787     const char *cnvName;
1788     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1789     UErrorCode errorCode=U_ZERO_ERROR;
1790     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1791     if(U_FAILURE(errorCode)) {
1792         log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1793         return;
1794     }
1795     TestNextUChar(cnv, source, limit, results, "UTF-7");
1796     /* Test the condition when source >= sourceLimit */
1797     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1798     cnvName = ucnv_getName(cnv, &errorCode);
1799     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1800         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1801     }
1802     ucnv_close(cnv);
1803 }
1804
1805 void
1806 static TestIMAP() {
1807     /* test input */
1808     static const uint8_t in[]={
1809         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1810         0x48,
1811         0x2d,
1812         0x26, 0x4a, 0x6a, 0x6f,
1813         0x2d, 0x2d,
1814         0x21,
1815         0x26, 0x2d,
1816         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1817     };
1818
1819     /* expected test results */
1820     static const int32_t results[]={
1821         /* number of bytes read, code point */
1822         1, 0x48,
1823         1, 0x2d,
1824         4, 0x263a, /* <WHITE SMILING FACE> */
1825         2, 0x2d,
1826         1, 0x21,
1827         2, 0x26,
1828         7, 0x10401
1829     };
1830
1831     const char *cnvName;
1832     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1833     UErrorCode errorCode=U_ZERO_ERROR;
1834     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1835     if(U_FAILURE(errorCode)) {
1836         log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1837         return;
1838     }
1839     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1840     /* Test the condition when source >= sourceLimit */
1841     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1842     cnvName = ucnv_getName(cnv, &errorCode);
1843     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1844         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1845     }
1846     ucnv_close(cnv);
1847 }
1848
1849 void
1850 static TestUTF8() {
1851     /* test input */
1852     static const uint8_t in[]={
1853         0x61,
1854         0xc2, 0x80,
1855         0xe0, 0xa0, 0x80,
1856         0xf0, 0x90, 0x80, 0x80,
1857         0xf4, 0x84, 0x8c, 0xa1,
1858         0xf0, 0x90, 0x90, 0x81
1859     };
1860
1861     /* expected test results */
1862     static const int32_t results[]={
1863         /* number of bytes read, code point */
1864         1, 0x61,
1865         2, 0x80,
1866         3, 0x800,
1867         4, 0x10000,
1868         4, 0x104321,
1869         4, 0x10401
1870     };
1871
1872     /* error test input */
1873     static const uint8_t in2[]={
1874         0x61,
1875         0xc0, 0x80,                     /* illegal non-shortest form */
1876         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1877         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1878         0xc0, 0xc0,                     /* illegal trail byte */
1879         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1880         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1881         0xfe,                           /* illegal byte altogether */
1882         0x62
1883     };
1884
1885     /* expected error test results */
1886     static const int32_t results2[]={
1887         /* number of bytes read, code point */
1888         1, 0x61,
1889         22, 0x62
1890     };
1891
1892     UConverterToUCallback cb;
1893     const void *p;
1894
1895     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1896     UErrorCode errorCode=U_ZERO_ERROR;
1897     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1898     if(U_FAILURE(errorCode)) {
1899         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1900         return;
1901     }
1902     TestNextUChar(cnv, source, limit, results, "UTF-8");
1903     /* Test the condition when source >= sourceLimit */
1904     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1905
1906     /* test error behavior with a skip callback */
1907     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1908     source=(const char *)in2;
1909     limit=(const char *)(in2+sizeof(in2));
1910     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1911
1912     ucnv_close(cnv);
1913 }
1914
1915 void
1916 static TestCESU8() {
1917     /* test input */
1918     static const uint8_t in[]={
1919         0x61,
1920         0xc2, 0x80,
1921         0xe0, 0xa0, 0x80,
1922         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1923         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1924         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1925         0xef, 0xbf, 0xbc
1926     };
1927
1928     /* expected test results */
1929     static const int32_t results[]={
1930         /* number of bytes read, code point */
1931         1, 0x61,
1932         2, 0x80,
1933         3, 0x800,
1934         6, 0x10000,
1935         3, 0xdc01,
1936         -1,0xd802,  /* may read 3 or 6 bytes */
1937         -1,0x10ffff,/* may read 0 or 3 bytes */
1938         3, 0xfffc
1939     };
1940
1941     /* error test input */
1942     static const uint8_t in2[]={
1943         0x61,
1944         0xc0, 0x80,                     /* illegal non-shortest form */
1945         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1946         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1947         0xc0, 0xc0,                     /* illegal trail byte */
1948         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1949         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1950         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1951         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1952         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1953         0xfe,                           /* illegal byte altogether */
1954         0x62
1955     };
1956
1957     /* expected error test results */
1958     static const int32_t results2[]={
1959         /* number of bytes read, code point */
1960         1, 0x61,
1961         34, 0x62
1962     };
1963
1964     UConverterToUCallback cb;
1965     const void *p;
1966
1967     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1968     UErrorCode errorCode=U_ZERO_ERROR;
1969     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1970     if(U_FAILURE(errorCode)) {
1971         log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1972         return;
1973     }
1974     TestNextUChar(cnv, source, limit, results, "CESU-8");
1975     /* Test the condition when source >= sourceLimit */
1976     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1977
1978     /* test error behavior with a skip callback */
1979     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1980     source=(const char *)in2;
1981     limit=(const char *)(in2+sizeof(in2));
1982     TestNextUChar(cnv, source, limit, results2, "CESU-8");
1983
1984     ucnv_close(cnv);
1985 }
1986
1987 void
1988 static TestUTF16() {
1989     /* test input */
1990     static const uint8_t in1[]={
1991         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1992     };
1993     static const uint8_t in2[]={
1994         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1995     };
1996     static const uint8_t in3[]={
1997         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1998     };
1999
2000     /* expected test results */
2001     static const int32_t results1[]={
2002         /* number of bytes read, code point */
2003         4, 0x4e00,
2004         2, 0xfeff
2005     };
2006     static const int32_t results2[]={
2007         /* number of bytes read, code point */
2008         4, 0x004e,
2009         2, 0xfffe
2010     };
2011     static const int32_t results3[]={
2012         /* number of bytes read, code point */
2013         2, 0xfefe,
2014         2, 0x4e00,
2015         2, 0xfeff,
2016         4, 0x20001
2017     };
2018
2019     const char *source, *limit;
2020
2021     UErrorCode errorCode=U_ZERO_ERROR;
2022     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2023     if(U_FAILURE(errorCode)) {
2024         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2025         return;
2026     }
2027
2028     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2029     TestNextUChar(cnv, source, limit, results1, "UTF-16");
2030
2031     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2032     ucnv_resetToUnicode(cnv);
2033     TestNextUChar(cnv, source, limit, results2, "UTF-16");
2034
2035     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2036     ucnv_resetToUnicode(cnv);
2037     TestNextUChar(cnv, source, limit, results3, "UTF-16");
2038
2039     /* Test the condition when source >= sourceLimit */
2040     ucnv_resetToUnicode(cnv);
2041     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2042
2043     ucnv_close(cnv);
2044 }
2045
2046 void
2047 static TestUTF16BE() {
2048     /* test input */
2049     static const uint8_t in[]={
2050         0x00, 0x61,
2051         0x00, 0xc0,
2052         0x00, 0x31,
2053         0x00, 0xf4,
2054         0xce, 0xfe,
2055         0xd8, 0x01, 0xdc, 0x01
2056     };
2057
2058     /* expected test results */
2059     static const int32_t results[]={
2060         /* number of bytes read, code point */
2061         2, 0x61,
2062         2, 0xc0,
2063         2, 0x31,
2064         2, 0xf4,
2065         2, 0xcefe,
2066         4, 0x10401
2067     };
2068
2069     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2070     UErrorCode errorCode=U_ZERO_ERROR;
2071     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2072     if(U_FAILURE(errorCode)) {
2073         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2074         return;
2075     }
2076     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2077     /* Test the condition when source >= sourceLimit */
2078     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2079     /*Test for the condition where there is an invalid character*/
2080     {
2081         static const uint8_t source2[]={0x61};
2082         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2083         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2084     }
2085 #if 0
2086     /*
2087      * Test disabled because currently the UTF-16BE/LE converters are supposed
2088      * to not set errors for unpaired surrogates.
2089      * This may change with
2090      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2091      */
2092
2093     /*Test for the condition where there is a surrogate pair*/
2094     {
2095         const uint8_t source2[]={0xd8, 0x01};
2096         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2097     }
2098 #endif
2099     ucnv_close(cnv);
2100 }
2101
2102 static void
2103 TestUTF16LE() {
2104     /* test input */
2105     static const uint8_t in[]={
2106         0x61, 0x00,
2107         0x31, 0x00,
2108         0x4e, 0x2e,
2109         0x4e, 0x00,
2110         0x01, 0xd8, 0x01, 0xdc
2111     };
2112
2113     /* expected test results */
2114     static const int32_t results[]={
2115         /* number of bytes read, code point */
2116         2, 0x61,
2117         2, 0x31,
2118         2, 0x2e4e,
2119         2, 0x4e,
2120         4, 0x10401
2121     };
2122
2123     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2124     UErrorCode errorCode=U_ZERO_ERROR;
2125     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2126     if(U_FAILURE(errorCode)) {
2127         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2128         return;
2129     }
2130     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2131     /* Test the condition when source >= sourceLimit */
2132     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2133     /*Test for the condition where there is an invalid character*/
2134     {
2135         static const uint8_t source2[]={0x61};
2136         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2137         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2138     }
2139 #if 0
2140     /*
2141      * Test disabled because currently the UTF-16BE/LE converters are supposed
2142      * to not set errors for unpaired surrogates.
2143      * This may change with
2144      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2145      */
2146
2147     /*Test for the condition where there is a surrogate character*/
2148     {
2149         static const uint8_t source2[]={0x01, 0xd8};
2150         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2151     }
2152 #endif
2153
2154     ucnv_close(cnv);
2155 }
2156
2157 void
2158 static TestUTF32() {
2159     /* test input */
2160     static const uint8_t in1[]={
2161         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2162     };
2163     static const uint8_t in2[]={
2164         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2165     };
2166     static const uint8_t in3[]={
2167         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2168     };
2169
2170     /* expected test results */
2171     static const int32_t results1[]={
2172         /* number of bytes read, code point */
2173         8, 0x100f00,
2174         4, 0xfeff
2175     };
2176     static const int32_t results2[]={
2177         /* number of bytes read, code point */
2178         8, 0x0f1000,
2179         4, 0xfffe
2180     };
2181     static const int32_t results3[]={
2182         /* number of bytes read, code point */
2183         4, 0xfefe,
2184         4, 0x100f00,
2185         4, 0xfffd, /* unmatched surrogate */
2186         4, 0xfffd  /* unmatched surrogate */
2187     };
2188
2189     const char *source, *limit;
2190
2191     UErrorCode errorCode=U_ZERO_ERROR;
2192     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2193     if(U_FAILURE(errorCode)) {
2194         log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2195         return;
2196     }
2197
2198     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2199     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2200
2201     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2202     ucnv_resetToUnicode(cnv);
2203     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2204
2205     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2206     ucnv_resetToUnicode(cnv);
2207     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2208
2209     /* Test the condition when source >= sourceLimit */
2210     ucnv_resetToUnicode(cnv);
2211     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2212
2213     ucnv_close(cnv);
2214 }
2215
2216 static void
2217 TestUTF32BE() {
2218     /* test input */
2219     static const uint8_t in[]={
2220         0x00, 0x00, 0x00, 0x61,
2221         0x00, 0x00, 0x30, 0x61,
2222         0x00, 0x00, 0xdc, 0x00,
2223         0x00, 0x00, 0xd8, 0x00,
2224         0x00, 0x00, 0xdf, 0xff,
2225         0x00, 0x00, 0xff, 0xfe,
2226         0x00, 0x10, 0xab, 0xcd,
2227         0x00, 0x10, 0xff, 0xff
2228     };
2229
2230     /* expected test results */
2231     static const int32_t results[]={
2232         /* number of bytes read, code point */
2233         4, 0x61,
2234         4, 0x3061,
2235         4, 0xfffd,
2236         4, 0xfffd,
2237         4, 0xfffd,
2238         4, 0xfffe,
2239         4, 0x10abcd,
2240         4, 0x10ffff
2241     };
2242
2243     /* error test input */
2244     static const uint8_t in2[]={
2245         0x00, 0x00, 0x00, 0x61,
2246         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2247         0x00, 0x00, 0x00, 0x62,
2248         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2249         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2250         0x00, 0x00, 0x01, 0x62,
2251         0x00, 0x00, 0x02, 0x62
2252     };
2253
2254     /* expected error test results */
2255     static const int32_t results2[]={
2256         /* number of bytes read, code point */
2257         4,  0x61,
2258         8,  0x62,
2259         12, 0x162,
2260         4,  0x262
2261     };
2262
2263     UConverterToUCallback cb;
2264     const void *p;
2265
2266     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2267     UErrorCode errorCode=U_ZERO_ERROR;
2268     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2269     if(U_FAILURE(errorCode)) {
2270         log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2271         return;
2272     }
2273     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2274
2275     /* Test the condition when source >= sourceLimit */
2276     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2277
2278     /* test error behavior with a skip callback */
2279     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2280     source=(const char *)in2;
2281     limit=(const char *)(in2+sizeof(in2));
2282     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2283
2284     ucnv_close(cnv);
2285 }
2286
2287 static void
2288 TestUTF32LE() {
2289     /* test input */
2290     static const uint8_t in[]={
2291         0x61, 0x00, 0x00, 0x00,
2292         0x61, 0x30, 0x00, 0x00,
2293         0x00, 0xdc, 0x00, 0x00,
2294         0x00, 0xd8, 0x00, 0x00,
2295         0xff, 0xdf, 0x00, 0x00,
2296         0xfe, 0xff, 0x00, 0x00,
2297         0xcd, 0xab, 0x10, 0x00,
2298         0xff, 0xff, 0x10, 0x00
2299     };
2300
2301     /* expected test results */
2302     static const int32_t results[]={
2303         /* number of bytes read, code point */
2304         4, 0x61,
2305         4, 0x3061,
2306         4, 0xfffd,
2307         4, 0xfffd,
2308         4, 0xfffd,
2309         4, 0xfffe,
2310         4, 0x10abcd,
2311         4, 0x10ffff
2312     };
2313
2314     /* error test input */
2315     static const uint8_t in2[]={
2316         0x61, 0x00, 0x00, 0x00,
2317         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2318         0x62, 0x00, 0x00, 0x00,
2319         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2320         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2321         0x62, 0x01, 0x00, 0x00,
2322         0x62, 0x02, 0x00, 0x00,
2323     };
2324
2325     /* expected error test results */
2326     static const int32_t results2[]={
2327         /* number of bytes read, code point */
2328         4,  0x61,
2329         8,  0x62,
2330         12, 0x162,
2331         4,  0x262,
2332     };
2333
2334     UConverterToUCallback cb;
2335     const void *p;
2336
2337     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2338     UErrorCode errorCode=U_ZERO_ERROR;
2339     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2340     if(U_FAILURE(errorCode)) {
2341         log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2342         return;
2343     }
2344     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2345
2346     /* Test the condition when source >= sourceLimit */
2347     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2348
2349     /* test error behavior with a skip callback */
2350     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2351     source=(const char *)in2;
2352     limit=(const char *)(in2+sizeof(in2));
2353     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2354
2355     ucnv_close(cnv);
2356 }
2357
2358 static void
2359 TestLATIN1() {
2360     /* test input */
2361     static const uint8_t in[]={
2362        0x61,
2363        0x31,
2364        0x32,
2365        0xc0,
2366        0xf0,
2367        0xf4,
2368     };
2369
2370     /* expected test results */
2371     static const int32_t results[]={
2372         /* number of bytes read, code point */
2373         1, 0x61,
2374         1, 0x31,
2375         1, 0x32,
2376         1, 0xc0,
2377         1, 0xf0,
2378         1, 0xf4,
2379     };
2380     static const uint16_t in1[] = {
2381         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2382         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2383         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2384         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2385         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2386         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2387         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2388         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2389         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2390         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2391         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2392         0xcb, 0x82
2393     };
2394     static const uint8_t out1[] = {
2395         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2396         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2397         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2398         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2399         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2400         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2401         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2402         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2403         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2404         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2405         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2406         0xcb, 0x82
2407     };
2408     static const uint16_t in2[]={
2409         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2410         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2411         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2412         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2413         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2414         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2415         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2416         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2417         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2418         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2419         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2420         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2421         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2422         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2423         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2424         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2425         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2426         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2427         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2428         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2429         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2430         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2431         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2432         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2433         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2434         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2435         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2436         0x37, 0x20, 0x2A, 0x2F,
2437     };
2438     static const unsigned char out2[]={
2439         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2440         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2441         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2442         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2443         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2444         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2445         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2446         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2447         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2448         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2449         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2450         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2451         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2452         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2453         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2454         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2455         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2456         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2457         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2458         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2459         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2460         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2461         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2462         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2463         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2464         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2465         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2466         0x37, 0x20, 0x2A, 0x2F,
2467     };
2468     const char *source=(const char *)in;
2469     const char *limit=(const char *)in+sizeof(in);
2470
2471     UErrorCode errorCode=U_ZERO_ERROR;
2472     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2473     if(U_FAILURE(errorCode)) {
2474         log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2475         return;
2476     }
2477     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2478     /* Test the condition when source >= sourceLimit */
2479     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2480     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2481     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2482
2483     ucnv_close(cnv);
2484 }
2485
2486 static void
2487 TestSBCS() {
2488     /* test input */
2489     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2490     /* expected test results */
2491     static const int32_t results[]={
2492         /* number of bytes read, code point */
2493         1, 0x61,
2494         1, 0xbf,
2495         1, 0xc4,
2496         1, 0x2021,
2497         1, 0xf8ff,
2498         1, 0x00d9
2499     };
2500
2501     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2502     UErrorCode errorCode=U_ZERO_ERROR;
2503     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2504     if(U_FAILURE(errorCode)) {
2505         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2506         return;
2507     }
2508     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2509     /* Test the condition when source >= sourceLimit */
2510     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2511     /*Test for Illegal character */ /*
2512     {
2513     static const uint8_t input1[]={ 0xA1 };
2514     const char* illegalsource=(const char*)input1;
2515     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2516     }
2517    */
2518     ucnv_close(cnv);
2519 }
2520
2521 static void
2522 TestDBCS() {
2523     /* test input */
2524     static const uint8_t in[]={
2525         0x44, 0x6a,
2526         0xc4, 0x9c,
2527         0x7a, 0x74,
2528         0x46, 0xab,
2529         0x42, 0x5b,
2530
2531     };
2532
2533     /* expected test results */
2534     static const int32_t results[]={
2535         /* number of bytes read, code point */
2536         2, 0x00a7,
2537         2, 0xe1d2,
2538         2, 0x6962,
2539         2, 0xf842,
2540         2, 0xffe5,
2541     };
2542
2543     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2544     UErrorCode errorCode=U_ZERO_ERROR;
2545
2546     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2547     if(U_FAILURE(errorCode)) {
2548         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2549         return;
2550     }
2551     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2552     /* Test the condition when source >= sourceLimit */
2553     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2554     /*Test for the condition where there is an invalid character*/
2555     {
2556         static const uint8_t source2[]={0x1a, 0x1b};
2557         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2558     }
2559     /*Test for the condition where we have a truncated char*/
2560     {
2561         static const uint8_t source1[]={0xc4};
2562         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2563         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2564     }
2565     ucnv_close(cnv);
2566 }
2567
2568 static void
2569 TestMBCS() {
2570     /* test input */
2571     static const uint8_t in[]={
2572         0x01,
2573         0xa6, 0xa3,
2574         0x00,
2575         0xa6, 0xa1,
2576         0x08,
2577         0xc2, 0x76,
2578         0xc2, 0x78,
2579
2580     };
2581
2582     /* expected test results */
2583     static const int32_t results[]={
2584         /* number of bytes read, code point */
2585         1, 0x0001,
2586         2, 0x250c,
2587         1, 0x0000,
2588         2, 0x2500,
2589         1, 0x0008,
2590         2, 0xd60c,
2591         2, 0xd60e,
2592     };
2593
2594     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2595     UErrorCode errorCode=U_ZERO_ERROR;
2596
2597     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2598     if(U_FAILURE(errorCode)) {
2599         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2600         return;
2601     }
2602     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2603     /* Test the condition when source >= sourceLimit */
2604     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2605     /*Test for the condition where there is an invalid character*/
2606     {
2607         static const uint8_t source2[]={0xa1, 0x01};
2608         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2609     }
2610     /*Test for the condition where we have a truncated char*/
2611     {
2612         static const uint8_t source1[]={0xc4};
2613         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2614         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2615     }
2616     ucnv_close(cnv);
2617
2618 }
2619
2620 #ifdef U_ENABLE_GENERIC_ISO_2022
2621
2622 static void
2623 TestISO_2022() {
2624     /* test input */
2625     static const uint8_t in[]={
2626         0x1b, 0x25, 0x42,
2627         0x31,
2628         0x32,
2629         0x61,
2630         0xc2, 0x80,
2631         0xe0, 0xa0, 0x80,
2632         0xf0, 0x90, 0x80, 0x80
2633     };
2634
2635
2636
2637     /* expected test results */
2638     static const int32_t results[]={
2639         /* number of bytes read, code point */
2640         4, 0x0031,  /* 4 bytes including the escape sequence */
2641         1, 0x0032,
2642         1, 0x61,
2643         2, 0x80,
2644         3, 0x800,
2645         4, 0x10000
2646     };
2647
2648     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2649     UErrorCode errorCode=U_ZERO_ERROR;
2650     UConverter *cnv;
2651
2652     cnv=ucnv_open("ISO_2022", &errorCode);
2653     if(U_FAILURE(errorCode)) {
2654         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2655         return;
2656     }
2657     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2658
2659     /* Test the condition when source >= sourceLimit */
2660     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2661     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2662     /*Test for the condition where we have a truncated char*/
2663     {
2664         static const uint8_t source1[]={0xc4};
2665         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2666         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2667     }
2668     /*Test for the condition where there is an invalid character*/
2669     {
2670         static const uint8_t source2[]={0xa1, 0x01};
2671         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2672     }
2673     ucnv_close(cnv);
2674 }
2675
2676 #endif
2677
2678 static void
2679 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2680     const UChar* uSource;
2681     const UChar* uSourceLimit;
2682     const char* cSource;
2683     const char* cSourceLimit;
2684     UChar *uTargetLimit =NULL;
2685     UChar *uTarget;
2686     char *cTarget;
2687     const char *cTargetLimit;
2688     char *cBuf;
2689     UChar *uBuf,*test;
2690     int32_t uBufSize = 120;
2691     int len=0;
2692     int i=2;
2693     UErrorCode errorCode=U_ZERO_ERROR;
2694     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2695     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2696     ucnv_reset(cnv);
2697     for(;--i>0; ){
2698         uSource = (UChar*) source;
2699         uSourceLimit=(const UChar*)sourceLimit;
2700         cTarget = cBuf;
2701         uTarget = uBuf;
2702         cSource = cBuf;
2703         cTargetLimit = cBuf;
2704         uTargetLimit = uBuf;
2705
2706         do{
2707
2708             cTargetLimit = cTargetLimit+ i;
2709             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2710             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2711                errorCode=U_ZERO_ERROR;
2712                 continue;
2713             }
2714
2715             if(U_FAILURE(errorCode)){
2716                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2717                 return;
2718             }
2719
2720         }while (uSource<uSourceLimit);
2721
2722         cSourceLimit =cTarget;
2723         do{
2724             uTargetLimit=uTargetLimit+i;
2725             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2726             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2727                errorCode=U_ZERO_ERROR;
2728                 continue;
2729             }
2730             if(U_FAILURE(errorCode)){
2731                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2732                     return;
2733             }
2734         }while(cSource<cSourceLimit);
2735
2736         uSource = source;
2737         test =uBuf;
2738         for(len=0;len<(int)(source - sourceLimit);len++){
2739             if(uBuf[len]!=uSource[len]){
2740                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2741             }
2742         }
2743     }
2744     free(uBuf);
2745     free(cBuf);
2746 }
2747 /* Test for Jitterbug 778 */
2748 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2749     const UChar* uSource;
2750     const UChar* uSourceLimit;
2751     const char* cSource;
2752     UChar *uTargetLimit =NULL;
2753     UChar *uTarget;
2754     char *cTarget;
2755     const char *cTargetLimit;
2756     char *cBuf;
2757     UChar *uBuf,*test;
2758     int32_t uBufSize = 120;
2759     int numCharsInTarget=0;
2760     UErrorCode errorCode=U_ZERO_ERROR;
2761     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2762     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2763     uSource = source;
2764     uSourceLimit=sourceLimit;
2765     cTarget = cBuf;
2766     cTargetLimit = cBuf +uBufSize*5;
2767     uTarget = uBuf;
2768     uTargetLimit = uBuf+ uBufSize*5;
2769     ucnv_reset(cnv);
2770     numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2771     if(U_FAILURE(errorCode)){
2772         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2773         return;
2774     }
2775     cSource = cBuf;
2776     test =uBuf;
2777     ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2778     if(U_FAILURE(errorCode)){
2779         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2780         return;
2781     }
2782     uSource = source;
2783     while(uSource<uSourceLimit){
2784         if(*test!=*uSource){
2785
2786             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2787         }
2788         uSource++;
2789         test++;
2790     }
2791     free(uBuf);
2792     free(cBuf);
2793 }
2794
2795 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2796     const UChar* uSource;
2797     const UChar* uSourceLimit;
2798     const char* cSource;
2799     const char* cSourceLimit;
2800     UChar *uTargetLimit =NULL;
2801     UChar *uTarget;
2802     char *cTarget;
2803     const char *cTargetLimit;
2804     char *cBuf;
2805     UChar *uBuf,*test;
2806     int32_t uBufSize = 120;
2807     int len=0;
2808     int i=2;
2809     const UChar *temp = sourceLimit;
2810     UErrorCode errorCode=U_ZERO_ERROR;
2811     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2812     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2813
2814     ucnv_reset(cnv);
2815     for(;--i>0;){
2816         uSource = (UChar*) source;
2817         cTarget = cBuf;
2818         uTarget = uBuf;
2819         cSource = cBuf;
2820         cTargetLimit = cBuf;
2821         uTargetLimit = uBuf+uBufSize*5;
2822         cTargetLimit = cTargetLimit+uBufSize*10;
2823         uSourceLimit=uSource;
2824         do{
2825
2826             if (uSourceLimit < sourceLimit) {
2827                 uSourceLimit = uSourceLimit+1;
2828             }
2829             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2830             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2831                errorCode=U_ZERO_ERROR;
2832                 continue;
2833             }
2834
2835             if(U_FAILURE(errorCode)){
2836                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2837                 return;
2838             }
2839
2840         }while (uSource<temp);
2841
2842         cSourceLimit =cBuf;
2843         do{
2844             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2845                 cSourceLimit = cSourceLimit+1;
2846             }
2847             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2848             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2849                errorCode=U_ZERO_ERROR;
2850                 continue;
2851             }
2852             if(U_FAILURE(errorCode)){
2853                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2854                     return;
2855             }
2856         }while(cSource<cTarget);
2857
2858         uSource = source;
2859         test =uBuf;
2860         for(;len<(int)(source - sourceLimit);len++){
2861             if(uBuf[len]!=uSource[len]){
2862                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2863             }
2864         }
2865     }
2866     free(uBuf);
2867     free(cBuf);
2868 }
2869 static void
2870 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2871                      const uint16_t results[], const char* message){
2872      const char* s0;
2873      const char* s=(char*)source;
2874      const uint16_t *r=results;
2875      UErrorCode errorCode=U_ZERO_ERROR;
2876      uint32_t c,exC;
2877      ucnv_reset(cnv);
2878      while(s<limit) {
2879         s0=s;
2880         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2881         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2882             break; /* no more significant input */
2883         } else if(U_FAILURE(errorCode)) {
2884             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2885             break;
2886         } else {
2887             if(UTF_IS_FIRST_SURROGATE(*r)){
2888                 int i =0, len = 2;
2889                 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2890                 r++;
2891             }else{
2892                 exC = *r;
2893             }
2894             if(c!=(uint32_t)(exC))
2895                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2896         }
2897         r++;
2898     }
2899 }
2900
2901 static int TestJitterbug930(const char* enc){
2902     UErrorCode err = U_ZERO_ERROR;
2903     UConverter*converter;
2904     char out[80];
2905     char*target = out;
2906     UChar in[4];
2907     const UChar*source = in;
2908     int32_t off[80];
2909     int32_t* offsets = off;
2910     int numOffWritten=0;
2911     UBool flush = 0;
2912     converter = my_ucnv_open(enc, &err);
2913
2914     in[0] = 0x41;     /* 0x4E00;*/
2915     in[1] = 0x4E01;
2916     in[2] = 0x4E02;
2917     in[3] = 0x4E03;
2918
2919     memset(off, '*', sizeof(off));
2920
2921     ucnv_fromUnicode (converter,
2922             &target,
2923             target+2,
2924             &source,
2925             source+3,
2926             offsets,
2927             flush,
2928             &err);
2929
2930         /* writes three bytes into the output buffer: 41 1B 24
2931         * but offsets contains 0 1 1
2932     */
2933     while(*offsets< off[10]){
2934         numOffWritten++;
2935         offsets++;
2936     }
2937     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2938     if(numOffWritten!= (int)(target-out)){
2939         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2940     }
2941
2942     err = U_ZERO_ERROR;
2943
2944     memset(off,'*' , sizeof(off));
2945
2946     flush = 1;
2947     offsets=off;
2948     ucnv_fromUnicode (converter,
2949             &target,
2950             target+4,
2951             &source,
2952             source,
2953             offsets,
2954             flush,
2955             &err);
2956     numOffWritten=0;
2957     while(*offsets< off[10]){
2958         numOffWritten++;
2959         if(*offsets!= -1){
2960             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2961         }
2962         offsets++;
2963     }
2964
2965     /* writes 42 43 7A into output buffer,
2966      * offsets contains -1 -1 -1
2967      */
2968     ucnv_close(converter);
2969     return 0;
2970 }
2971
2972 static void
2973 TestHZ() {
2974     /* test input */
2975     static const uint16_t in[]={
2976             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2977             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2978             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2979             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2980             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2981             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2982             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2983             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2984             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2985             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2986             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2987             0x005A, 0x005B, 0x005C, 0x000A
2988       };
2989     const UChar* uSource;
2990     const UChar* uSourceLimit;
2991     const char* cSource;
2992     const char* cSourceLimit;
2993     UChar *uTargetLimit =NULL;
2994     UChar *uTarget;
2995     char *cTarget;
2996     const char *cTargetLimit;
2997     char *cBuf;
2998     UChar *uBuf,*test;
2999     int32_t uBufSize = 120;
3000     UErrorCode errorCode=U_ZERO_ERROR;
3001     UConverter *cnv;
3002     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3003     int32_t* myOff= offsets;
3004     cnv=ucnv_open("HZ", &errorCode);
3005     if(U_FAILURE(errorCode)) {
3006         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3007         return;
3008     }
3009
3010     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3011     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3012     uSource = (const UChar*)in;
3013     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3014     cTarget = cBuf;
3015     cTargetLimit = cBuf +uBufSize*5;
3016     uTarget = uBuf;
3017     uTargetLimit = uBuf+ uBufSize*5;
3018     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3019     if(U_FAILURE(errorCode)){
3020         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3021         return;
3022     }
3023     cSource = cBuf;
3024     cSourceLimit =cTarget;
3025     test =uBuf;
3026     myOff=offsets;
3027     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3028     if(U_FAILURE(errorCode)){
3029         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3030         return;
3031     }
3032     uSource = (const UChar*)in;
3033     while(uSource<uSourceLimit){
3034         if(*test!=*uSource){
3035
3036             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3037         }
3038         uSource++;
3039         test++;
3040     }
3041     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3042     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3043     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3044     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3045     TestJitterbug930("csISO2022JP");
3046     ucnv_close(cnv);
3047     free(offsets);
3048     free(uBuf);
3049     free(cBuf);
3050 }
3051
3052 static void
3053 TestISCII(){
3054         /* test input */
3055     static const uint16_t in[]={
3056         /* test full range of Devanagari */
3057         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3058         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3059         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3060         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3061         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3062         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3063         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3064         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3065         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3066         0x096D,0x096E,0x096F,
3067         /* test Soft halant*/
3068         0x0915,0x094d, 0x200D,
3069         /* test explicit halant */
3070         0x0915,0x094d, 0x200c,
3071         /* test double danda */
3072         0x965,
3073         /* test ASCII */
3074         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3075         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3076         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3077         /* tests from Lotus */
3078         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3079         0x0930,0x094D,0x200D,
3080         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3081         0x0915,0x0921,0x002B,0x095F,
3082         /* tamil range */
3083         0x0B86, 0xB87, 0xB88,
3084         /* telugu range */
3085         0x0C05, 0x0C02, 0x0C03,0x0c31,
3086         /* kannada range */
3087         0x0C85, 0xC82, 0x0C83,
3088         /* test Abbr sign and Anudatta */
3089         0x0970, 0x952,
3090        /* 0x0958,
3091         0x0959,
3092         0x095A,
3093         0x095B,
3094         0x095C,
3095         0x095D,
3096         0x095E,
3097         0x095F,*/
3098         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3099         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3100         0x090C ,
3101         0x0962,
3102         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3103         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3104         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3105         0x093D /* Avagraha  0xEA, 0xE9*/,
3106         0x0958,
3107         0x0959,
3108         0x095A,
3109         0x095B,
3110         0x095C,
3111         0x095D,
3112         0x095E,
3113         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3114       };
3115     static const unsigned char byteArr[]={
3116
3117         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3118         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3119         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3120         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3121         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3122         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3123         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3124         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3125         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3126         0xf8,0xf9,0xfa,
3127         /* test soft halant */
3128         0xb3, 0xE8, 0xE9,
3129         /* test explicit halant */
3130         0xb3, 0xE8, 0xE8,
3131         /* test double danda */
3132         0xea, 0xea,
3133         /* test ASCII */
3134         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3135         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3136         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3137         /* test ATR code */
3138
3139         /* tests from Lotus */
3140         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3141         0xEF,0x42,0xCF,0xE8,0xD9,
3142         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3143         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3144         /* tamil range */
3145         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3146         /* telugu range */
3147         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3148         /* kannada range */
3149         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3150         /* anudatta and abbreviation sign */
3151         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3152
3153
3154         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3155
3156         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3157
3158         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3159
3160         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3161
3162         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3163
3164         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3165
3166         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3167
3168         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3169
3170         0xB3, 0xE9, /* Ka + NUKTA */
3171
3172         0xB4, 0xE9, /* Kha + NUKTA */
3173
3174         0xB5, 0xE9, /* Ga + NUKTA */
3175
3176         0xBA, 0xE9,
3177
3178         0xBF, 0xE9,
3179
3180         0xC0, 0xE9,
3181
3182         0xC9, 0xE9,
3183         /* INV halant RA    */
3184         0xD9, 0xE8, 0xCF,
3185         0x00, 0x00A0,
3186         /* just consume unhandled codepoints */
3187         0xEF, 0x30,
3188
3189     };
3190     testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3191     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3192
3193 }
3194
3195 static void
3196 TestISO_2022_JP() {
3197     /* test input */
3198     static const uint16_t in[]={
3199         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3200         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3201         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3202         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3203         0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3204         0x201D, 0x3014, 0x000D, 0x000A,
3205         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3206         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3207         };
3208     const UChar* uSource;
3209     const UChar* uSourceLimit;
3210     const char* cSource;
3211     const char* cSourceLimit;
3212     UChar *uTargetLimit =NULL;
3213     UChar *uTarget;
3214     char *cTarget;
3215     const char *cTargetLimit;
3216     char *cBuf;
3217     UChar *uBuf,*test;
3218     int32_t uBufSize = 120;
3219     UErrorCode errorCode=U_ZERO_ERROR;
3220     UConverter *cnv;
3221     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3222     int32_t* myOff= offsets;
3223     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3224     if(U_FAILURE(errorCode)) {
3225         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3226         return;
3227     }
3228
3229     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3230     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3231     uSource = (const UChar*)in;
3232     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3233     cTarget = cBuf;
3234     cTargetLimit = cBuf +uBufSize*5;
3235     uTarget = uBuf;
3236     uTargetLimit = uBuf+ uBufSize*5;
3237     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3238     if(U_FAILURE(errorCode)){
3239         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3240         return;
3241     }
3242     cSource = cBuf;
3243     cSourceLimit =cTarget;
3244     test =uBuf;
3245     myOff=offsets;
3246     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3247     if(U_FAILURE(errorCode)){
3248         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3249         return;
3250     }
3251
3252     uSource = (const UChar*)in;
3253     while(uSource<uSourceLimit){
3254         if(*test!=*uSource){
3255
3256             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3257         }
3258         uSource++;
3259         test++;
3260     }
3261
3262     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3263     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3264     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3265     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3266     TestJitterbug930("csISO2022JP");
3267     ucnv_close(cnv);
3268     free(uBuf);
3269     free(cBuf);
3270     free(offsets);
3271 }
3272
3273 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3274     const UChar* uSource;
3275     const UChar* uSourceLimit;
3276     const char* cSource;
3277     const char* cSourceLimit;
3278     UChar *uTargetLimit =NULL;
3279     UChar *uTarget;
3280     char *cTarget;
3281     const char *cTargetLimit;
3282     char *cBuf;
3283     UChar *uBuf,*test;
3284     int32_t uBufSize = 120*10;
3285     UErrorCode errorCode=U_ZERO_ERROR;
3286     UConverter *cnv;
3287     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3288     int32_t* myOff= offsets;
3289     cnv=my_ucnv_open(conv, &errorCode);
3290     if(U_FAILURE(errorCode)) {
3291         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3292         return;
3293     }
3294
3295     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3296     cBuf =(char*)malloc(uBufSize * sizeof(char));
3297     uSource = (const UChar*)in;
3298     uSourceLimit=uSource+len;
3299     cTarget = cBuf;
3300     cTargetLimit = cBuf +uBufSize;
3301     uTarget = uBuf;
3302     uTargetLimit = uBuf+ uBufSize;
3303     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3304     if(U_FAILURE(errorCode)){
3305         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3306         return;
3307     }
3308     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3309     cSource = cBuf;
3310     cSourceLimit =cTarget;
3311     test =uBuf;
3312     myOff=offsets;
3313     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3314     if(U_FAILURE(errorCode)){
3315         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3316         return;
3317     }
3318
3319     uSource = (const UChar*)in;
3320     while(uSource<uSourceLimit){
3321         if(*test!=*uSource){
3322             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3323         }
3324         uSource++;
3325         test++;
3326     }
3327     TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3328     TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3329     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3330     if(byteArr && byteArrLen!=0){
3331         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3332         TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3333         {
3334             cSource = byteArr;
3335             cSourceLimit = cSource+byteArrLen;
3336             test=uBuf;
3337             myOff = offsets;
3338             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3339             if(U_FAILURE(errorCode)){
3340                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3341                 return;
3342             }
3343
3344             uSource = (const UChar*)in;
3345             while(uSource<uSourceLimit){
3346                 if(*test!=*uSource){
3347                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3348                 }
3349                 uSource++;
3350                 test++;
3351             }
3352         }
3353     }
3354
3355     ucnv_close(cnv);
3356     free(uBuf);
3357     free(cBuf);
3358     free(offsets);
3359 }
3360 static UChar U_CALLCONV
3361 _charAt(int32_t offset, void *context) {
3362     return ((char*)context)[offset];
3363 }
3364
3365 static int32_t
3366 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3367     int32_t srcIndex=0;
3368     int32_t dstIndex=0;
3369     if(U_FAILURE(*status)){
3370         return 0;
3371     }
3372     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3373         *status = U_ILLEGAL_ARGUMENT_ERROR;
3374         return 0;
3375     }
3376     if(srcLen==-1){
3377         srcLen = (int32_t)uprv_strlen(src);
3378     }
3379
3380     for (; srcIndex<srcLen; ) {
3381         UChar32 c = src[srcIndex++];
3382         if (c == 0x005C /*'\\'*/) {
3383             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3384             if (c == (UChar32)0xFFFFFFFF) {
3385                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3386                 break; /* invalid escape sequence */
3387             }
3388         }
3389         if(dstIndex < dstLen){
3390             if(c>0xFFFF){
3391                dst[dstIndex++] = UTF16_LEAD(c);
3392                if(dstIndex<dstLen){
3393                     dst[dstIndex]=UTF16_TRAIL(c);
3394                }else{
3395                    *status=U_BUFFER_OVERFLOW_ERROR;
3396                }
3397             }else{
3398                 dst[dstIndex]=(UChar)c;
3399             }
3400
3401         }else{
3402             *status = U_BUFFER_OVERFLOW_ERROR;
3403         }
3404         dstIndex++; /* for preflighting */
3405     }
3406     return dstIndex;
3407 }
3408
3409 static void
3410 TestFullRoundtrip(const char* cp){
3411     UChar usource[10] ={0};
3412     UChar nsrc[10] = {0};
3413     uint32_t i=1;
3414     int len=0, ulen;
3415     nsrc[0]=0x0061;
3416     /* Test codepoint 0 */
3417     TestConv(usource,1,cp,"",NULL,0);
3418     TestConv(usource,2,cp,"",NULL,0);
3419     nsrc[2]=0x5555;
3420     TestConv(nsrc,3,cp,"",NULL,0);
3421
3422     for(;i<=0x10FFFF;i++){
3423         if(i==0xD800){
3424             i=0xDFFF;
3425             continue;
3426         }
3427         if(i<=0xFFFF){
3428             usource[0] =(UChar) i;
3429             len=1;
3430         }else{
3431             usource[0]=UTF16_LEAD(i);
3432             usource[1]=UTF16_TRAIL(i);
3433             len=2;
3434         }
3435         ulen=len;
3436         if(i==0x80) {
3437             usource[2]=0;
3438         }
3439         /* Test only single code points */
3440         TestConv(usource,ulen,cp,"",NULL,0);
3441         /* Test codepoint repeated twice */
3442         usource[ulen]=usource[0];
3443         usource[ulen+1]=usource[1];
3444         ulen+=len;
3445         TestConv(usource,ulen,cp,"",NULL,0);
3446         /* Test codepoint repeated 3 times */
3447         usource[ulen]=usource[0];
3448         usource[ulen+1]=usource[1];
3449         ulen+=len;
3450         TestConv(usource,ulen,cp,"",NULL,0);
3451         /* Test codepoint in between 2 codepoints */
3452         nsrc[1]=usource[0];
3453         nsrc[2]=usource[1];
3454         nsrc[len+1]=0x5555;
3455         TestConv(nsrc,len+2,cp,"",NULL,0);
3456         uprv_memset(usource,0,sizeof(UChar)*10);
3457     }
3458 }
3459
3460 static void
3461 TestRoundTrippingAllUTF(void){
3462     if(!QUICK){
3463         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3464         TestFullRoundtrip("BOCU-1");
3465         log_verbose("Running exhaustive round trip test for SCSU\n");
3466         TestFullRoundtrip("SCSU");
3467         log_verbose("Running exhaustive round trip test for UTF-8\n");
3468         TestFullRoundtrip("UTF-8");
3469         log_verbose("Running exhaustive round trip test for CESU-8\n");
3470         TestFullRoundtrip("CESU-8");
3471         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3472         TestFullRoundtrip("UTF-16BE");
3473         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3474         TestFullRoundtrip("UTF-16LE");
3475         log_verbose("Running exhaustive round trip test for UTF-16\n");
3476         TestFullRoundtrip("UTF-16");
3477         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3478         TestFullRoundtrip("UTF-32BE");
3479         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3480         TestFullRoundtrip("UTF-32LE");
3481         log_verbose("Running exhaustive round trip test for UTF-32\n");
3482         TestFullRoundtrip("UTF-32");
3483         log_verbose("Running exhaustive round trip test for UTF-7\n");
3484         TestFullRoundtrip("UTF-7");
3485         log_verbose("Running exhaustive round trip test for UTF-7\n");
3486         TestFullRoundtrip("UTF-7,version=1");
3487         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3488         TestFullRoundtrip("IMAP-mailbox-name");
3489         log_verbose("Running exhaustive round trip test for GB18030\n");
3490         TestFullRoundtrip("GB18030");
3491     }
3492 }
3493
3494 static void
3495 TestSCSU() {
3496
3497     static const uint16_t germanUTF16[]={
3498         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3499     };
3500
3501     static const uint8_t germanSCSU[]={
3502         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3503     };
3504
3505     static const uint16_t russianUTF16[]={
3506         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3507     };
3508
3509     static const uint8_t russianSCSU[]={
3510         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3511     };
3512
3513     static const uint16_t japaneseUTF16[]={
3514         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3515         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3516         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3517         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3518         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3519         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3520         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3521         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3522         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3523         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3524         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3525         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3526         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3527         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3528         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3529     };
3530
3531     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3532      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3533     static const uint8_t japaneseSCSU[]={
3534         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3535         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3536         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3537         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3538         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3539         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3540         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3541         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3542         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3543         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3544         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3545         0xcb, 0x82
3546     };
3547
3548     static const uint16_t allFeaturesUTF16[]={
3549         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3550         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3551         0x01df, 0xf000, 0xdbff, 0xdfff
3552     };
3553
3554     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3555      * result here (34B vs. 35B)
3556      */
3557     static const uint8_t allFeaturesSCSU[]={
3558         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3559         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3560         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3561         0xdf, 0x14, 0x80, 0x15, 0xff
3562     };
3563     static const uint16_t monkeyIn[]={
3564         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3565         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3566         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3567         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3568         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3569         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3570         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3571         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3572         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3573         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3574         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3575         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3576         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3577         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3578         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3579         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3580         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3581         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3582         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3583         /* test non-BMP code points */
3584         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3585         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3586         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3587         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3588         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3589         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3590         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3591         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3592         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3593         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3594         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3595
3596
3597         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3598         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3599         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3600         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3601         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3602     };
3603     static const char *fTestCases [] = {
3604           "\\ud800\\udc00", /* smallest surrogate*/
3605           "\\ud8ff\\udcff",
3606           "\\udBff\\udFff", /* largest surrogate pair*/
3607           "\\ud834\\udc00",
3608           "\\U0010FFFF",
3609           "Hello \\u9292 \\u9192 World!",
3610           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3611           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3612
3613           "\\u0648\\u06c8", /* catch missing reset*/
3614           "\\u0648\\u06c8",
3615
3616           "\\u4444\\uE001", /* lowest quotable*/
3617           "\\u4444\\uf2FF", /* highest quotable*/
3618           "\\u4444\\uf188\\u4444",
3619           "\\u4444\\uf188\\uf288",
3620           "\\u4444\\uf188abc\\u0429\\uf288",
3621           "\\u9292\\u2222",
3622           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3623           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3624           "Hello World!123456",
3625           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3626
3627           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3628           "abc\\u4411d",      /* uses SQU*/
3629           "abc\\u4411\\u4412d",/* uses SCU*/
3630           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3631           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3632           "\\u9292\\u2222",
3633           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3634           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3635           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3636
3637           "", /* empty input*/
3638           "\\u0000", /* smallest BMP character*/
3639           "\\uFFFF", /* largest BMP character*/
3640
3641           /* regression tests*/
3642           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3643           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3644           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3645           "\\u0041\\u00df\\u0401\\u015f",
3646           "\\u9066\\u2123abc",
3647           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3648           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3649     };
3650     int i=0;
3651     for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3652         const char* cSrc = fTestCases[i];
3653         UErrorCode status = U_ZERO_ERROR;
3654         int32_t cSrcLen,srcLen;
3655         UChar* src;
3656         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3657         cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3658         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3659         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3660         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3661         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3662         free(src);
3663     }
3664     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3665     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3666     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3667     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3668     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3669     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3670     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3671 }
3672
3673 #if !UCONFIG_NO_LEGACY_CONVERSION
3674 static void TestJitterbug2346(){
3675     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3676                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3677     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3678
3679     UChar uTarget[500]={'\0'};
3680     UChar* utarget=uTarget;
3681     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3682
3683     char cTarget[500]={'\0'};
3684     char* ctarget=cTarget;
3685     char* ctargetLimit=cTarget+sizeof(cTarget);
3686     const char* csource=source;
3687     UChar* temp = expected;
3688     UErrorCode err=U_ZERO_ERROR;
3689
3690     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3691     if(U_FAILURE(err)) {
3692         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3693         return;
3694     }
3695     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3696     if(U_FAILURE(err)) {
3697         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3698         return;
3699     }
3700     utargetLimit=utarget;
3701     utarget = uTarget;
3702     while(utarget<utargetLimit){
3703         if(*temp!=*utarget){
3704
3705             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3706         }
3707         utarget++;
3708         temp++;
3709     }
3710     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3711     if(U_FAILURE(err)) {
3712         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3713         return;
3714     }
3715     ctargetLimit=ctarget;
3716     ctarget =cTarget;
3717     ucnv_close(conv);
3718
3719
3720 }
3721
3722 static void
3723 TestISO_2022_JP_1() {
3724     /* test input */
3725     static const uint16_t in[]={
3726         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3727         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3728         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3729         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3730         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3731         0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3732         0x201D, 0x000D, 0x000A,
3733         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3734         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3735         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3736         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3737         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3738         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3739       };
3740     const UChar* uSource;
3741     const UChar* uSourceLimit;
3742     const char* cSource;
3743     const char* cSourceLimit;
3744     UChar *uTargetLimit =NULL;
3745     UChar *uTarget;
3746     char *cTarget;
3747     const char *cTargetLimit;
3748     char *cBuf;
3749     UChar *uBuf,*test;
3750     int32_t uBufSize = 120;
3751     UErrorCode errorCode=U_ZERO_ERROR;
3752     UConverter *cnv;
3753
3754     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3755     if(U_FAILURE(errorCode)) {
3756         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3757         return;
3758     }
3759
3760     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3761     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3762     uSource = (const UChar*)in;
3763     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3764     cTarget = cBuf;
3765     cTargetLimit = cBuf +uBufSize*5;
3766     uTarget = uBuf;
3767     uTargetLimit = uBuf+ uBufSize*5;
3768     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3769     if(U_FAILURE(errorCode)){
3770         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3771         return;
3772     }
3773     cSource = cBuf;
3774     cSourceLimit =cTarget;
3775     test =uBuf;
3776     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3777     if(U_FAILURE(errorCode)){
3778         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3779         return;
3780     }
3781     uSource = (const UChar*)in;
3782     while(uSource<uSourceLimit){
3783         if(*test!=*uSource){
3784
3785             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3786         }
3787         uSource++;
3788         test++;
3789     }
3790     /*ucnv_close(cnv);
3791     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3792     /*Test for the condition where there is an invalid character*/
3793     ucnv_reset(cnv);
3794     {
3795         static const uint8_t source2[]={0x0e,0x24,0x053};
3796         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3797     }
3798     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3799     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3800     ucnv_close(cnv);
3801     free(uBuf);
3802     free(cBuf);
3803 }
3804
3805 static void
3806 TestISO_2022_JP_2() {
3807     /* test input */
3808     static const uint16_t in[]={
3809         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3810         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3811         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3812         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3813         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3814         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3815         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3816         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3817         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3818         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3819         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3820         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3821         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3822         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3823         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3824         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3825         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3826         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3827         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3828       };
3829     const UChar* uSource;
3830     const UChar* uSourceLimit;
3831     const char* cSource;
3832     const char* cSourceLimit;
3833     UChar *uTargetLimit =NULL;
3834     UChar *uTarget;
3835     char *cTarget;
3836     const char *cTargetLimit;
3837     char *cBuf;
3838     UChar *uBuf,*test;
3839     int32_t uBufSize = 120;
3840     UErrorCode errorCode=U_ZERO_ERROR;
3841     UConverter *cnv;
3842     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3843     int32_t* myOff= offsets;
3844     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3845     if(U_FAILURE(errorCode)) {
3846         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3847         return;
3848     }
3849
3850     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3851     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3852     uSource = (const UChar*)in;
3853     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3854     cTarget = cBuf;
3855     cTargetLimit = cBuf +uBufSize*5;
3856     uTarget = uBuf;
3857     uTargetLimit = uBuf+ uBufSize*5;
3858     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3859     if(U_FAILURE(errorCode)){
3860         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3861         return;
3862     }
3863     cSource = cBuf;
3864     cSourceLimit =cTarget;
3865     test =uBuf;
3866     myOff=offsets;
3867     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3868     if(U_FAILURE(errorCode)){
3869         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3870         return;
3871     }
3872     uSource = (const UChar*)in;
3873     while(uSource<uSourceLimit){
3874         if(*test!=*uSource){
3875
3876             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3877         }
3878         uSource++;
3879         test++;
3880     }
3881     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3882     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3883     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3884     /*Test for the condition where there is an invalid character*/
3885     ucnv_reset(cnv);
3886     {
3887         static const uint8_t source2[]={0x0e,0x24,0x053};
3888         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3889     }
3890     ucnv_close(cnv);
3891     free(uBuf);
3892     free(cBuf);
3893     free(offsets);
3894 }
3895
3896 static void
3897 TestISO_2022_KR() {
3898     /* test input */
3899     static const uint16_t in[]={
3900                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3901                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3902                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3903                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3904                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3905                    ,0x53E3,0x53E4,0x000A,0x000D};
3906     const UChar* uSource;
3907     const UChar* uSourceLimit;
3908     const char* cSource;
3909     const char* cSourceLimit;
3910     UChar *uTargetLimit =NULL;
3911     UChar *uTarget;
3912     char *cTarget;
3913     const char *cTargetLimit;
3914     char *cBuf;
3915     UChar *uBuf,*test;
3916     int32_t uBufSize = 120;
3917     UErrorCode errorCode=U_ZERO_ERROR;
3918     UConverter *cnv;
3919     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3920     int32_t* myOff= offsets;
3921     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3922     if(U_FAILURE(errorCode)) {
3923         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3924         return;
3925     }
3926
3927     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3928     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3929     uSource = (const UChar*)in;
3930     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3931     cTarget = cBuf;
3932     cTargetLimit = cBuf +uBufSize*5;
3933     uTarget = uBuf;
3934     uTargetLimit = uBuf+ uBufSize*5;
3935     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3936     if(U_FAILURE(errorCode)){
3937         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3938         return;
3939     }
3940     cSource = cBuf;
3941     cSourceLimit =cTarget;
3942     test =uBuf;
3943     myOff=offsets;
3944     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3945     if(U_FAILURE(errorCode)){
3946         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3947         return;
3948     }
3949     uSource = (const UChar*)in;
3950     while(uSource<uSourceLimit){
3951         if(*test!=*uSource){
3952             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3953         }
3954         uSource++;
3955         test++;
3956     }
3957     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3958     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3959     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3960     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3961     TestJitterbug930("csISO2022KR");
3962     /*Test for the condition where there is an invalid character*/
3963     ucnv_reset(cnv);
3964     {
3965         static const uint8_t source2[]={0x1b,0x24,0x053};
3966         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
3967         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3968     }
3969     ucnv_close(cnv);
3970     free(uBuf);
3971     free(cBuf);
3972     free(offsets);
3973 }
3974
3975 static void
3976 TestISO_2022_KR_1() {
3977     /* test input */
3978     static const uint16_t in[]={
3979                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3980                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3981                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3982                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3983                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3984                    ,0x53E3,0x53E4,0x000A,0x000D};
3985     const UChar* uSource;
3986     const UChar* uSourceLimit;
3987     const char* cSource;
3988     const char* cSourceLimit;
3989     UChar *uTargetLimit =NULL;
3990     UChar *uTarget;
3991     char *cTarget;
3992     const char *cTargetLimit;
3993     char *cBuf;
3994     UChar *uBuf,*test;
3995     int32_t uBufSize = 120;
3996     UErrorCode errorCode=U_ZERO_ERROR;
3997     UConverter *cnv;
3998     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3999     int32_t* myOff= offsets;
4000     cnv=ucnv_open("ibm-25546", &errorCode);
4001     if(U_FAILURE(errorCode)) {
4002         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4003         return;
4004     }
4005
4006     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4007     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4008     uSource = (const UChar*)in;
4009     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4010     cTarget = cBuf;
4011     cTargetLimit = cBuf +uBufSize*5;
4012     uTarget = uBuf;
4013     uTargetLimit = uBuf+ uBufSize*5;
4014     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4015     if(U_FAILURE(errorCode)){
4016         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4017         return;
4018     }
4019     cSource = cBuf;
4020     cSourceLimit =cTarget;
4021     test =uBuf;
4022     myOff=offsets;
4023     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4024     if(U_FAILURE(errorCode)){
4025         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4026         return;
4027     }
4028     uSource = (const UChar*)in;
4029     while(uSource<uSourceLimit){
4030         if(*test!=*uSource){
4031             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4032         }
4033         uSource++;
4034         test++;
4035     }
4036     ucnv_reset(cnv);
4037     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4038     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4039     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4040     ucnv_reset(cnv);
4041     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4042         /*Test for the condition where there is an invalid character*/
4043     ucnv_reset(cnv);
4044     {
4045         static const uint8_t source2[]={0x1b,0x24,0x053};
4046         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4047         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4048     }
4049     ucnv_close(cnv);
4050     free(uBuf);
4051     free(cBuf);
4052     free(offsets);
4053 }
4054
4055 static void TestJitterbug2411(){
4056     static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4057                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4058     UConverter* kr=NULL, *kr1=NULL;
4059     UErrorCode errorCode = U_ZERO_ERROR;
4060     UChar tgt[100]={'\0'};
4061     UChar* target = tgt;
4062     UChar* targetLimit = target+100;
4063     kr=ucnv_open("iso-2022-kr", &errorCode);
4064     if(U_FAILURE(errorCode)) {
4065         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4066         return;
4067     }
4068     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4069     if(U_FAILURE(errorCode)) {
4070         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4071         return;
4072     }
4073     kr1 = ucnv_open("ibm-25546", &errorCode);
4074     if(U_FAILURE(errorCode)) {
4075         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4076         return;
4077     }
4078     target = tgt;
4079     targetLimit = target+100;
4080     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4081
4082     if(U_FAILURE(errorCode)) {
4083         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4084         return;
4085     }
4086
4087     ucnv_close(kr);
4088     ucnv_close(kr1);
4089
4090 }
4091
4092 static void
4093 TestJIS(){
4094     /* From Unicode moved to testdata/conversion.txt */
4095     /*To Unicode*/
4096     {
4097         static const uint8_t sampleTextJIS[] = {
4098             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4099             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4100             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4101         };
4102         static const uint16_t expectedISO2022JIS[] = {
4103             0x0041, 0x0042,
4104             0xFF81, 0xFF82,
4105             0x3000
4106         };
4107         static const int32_t  toISO2022JISOffs[]={
4108             3,4,
4109             8,9,
4110             16
4111         };
4112
4113         static const uint8_t sampleTextJIS7[] = {
4114             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4115             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4116             0x1b,0x24,0x42,0x21,0x21,
4117             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4118             0x21,0x22,
4119             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4120         };
4121         static const uint16_t expectedISO2022JIS7[] = {
4122             0x0041, 0x0042,
4123             0xFF81, 0xFF82,
4124             0x3000,
4125             0xFF81, 0xFF82,
4126             0x3001,
4127             0x3000
4128         };
4129         static const int32_t  toISO2022JIS7Offs[]={
4130             3,4,
4131             8,9,
4132             13,16,
4133             17,
4134             19,27
4135         };
4136         static const uint8_t sampleTextJIS8[] = {
4137             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4138             0xa1,0xc8,0xd9,/*Katakana Set*/
4139             0x1b,0x28,0x42,
4140             0x41,0x42,
4141             0xb1,0xc3, /*Katakana Set*/
4142             0x1b,0x24,0x42,0x21,0x21
4143         };
4144         static const uint16_t expectedISO2022JIS8[] = {
4145             0x0041, 0x0042,
4146             0xff61, 0xff88, 0xff99,
4147             0x0041, 0x0042,
4148             0xff71, 0xff83,
4149             0x3000
4150         };
4151         static const int32_t  toISO2022JIS8Offs[]={
4152             3, 4,  5,  6,
4153             7, 11, 12, 13,
4154             14, 18,
4155         };
4156
4157         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4158             sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4159         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4160             sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4161         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4162             sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4163     }
4164
4165 }
4166
4167 static void TestJitterbug915(){
4168 /* tests for roundtripping of the below sequence
4169 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4170 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4171 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4172 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4173 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4174 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4175 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4176 */
4177     static const char cSource[]={
4178         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4179         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4180         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4181         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4182         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4183         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4184         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4185         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4186         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4187         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4188         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4189         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4190         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4191         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4192         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4193         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4194         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4195         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4196         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4197         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4198         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4199         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4200         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4201         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4202         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4203         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4204         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4205         0x37, 0x20, 0x2A, 0x2F
4206     };
4207     UChar uTarget[500]={'\0'};
4208     UChar* utarget=uTarget;
4209     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4210
4211     char cTarget[500]={'\0'};
4212     char* ctarget=cTarget;
4213     char* ctargetLimit=cTarget+sizeof(cTarget);
4214     const char* csource=cSource;
4215     const char* tempSrc = cSource;
4216     UErrorCode err=U_ZERO_ERROR;
4217
4218     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4219     if(U_FAILURE(err)) {
4220         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4221         return;
4222     }
4223     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4224     if(U_FAILURE(err)) {
4225         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4226         return;
4227     }
4228     utargetLimit=utarget;
4229     utarget = uTarget;
4230     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4231     if(U_FAILURE(err)) {
4232         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4233         return;
4234     }
4235     ctargetLimit=ctarget;
4236     ctarget =cTarget;
4237     while(ctarget<ctargetLimit){
4238         if(*ctarget != *tempSrc){
4239             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4240         }
4241         ++ctarget;
4242         ++tempSrc;
4243     }
4244
4245     ucnv_close(conv);
4246 }
4247
4248 static void
4249 TestISO_2022_CN_EXT() {
4250     /* test input */
4251     static const uint16_t in[]={
4252                 /* test Non-BMP code points */
4253          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4254          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4255          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4256          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4257          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4258          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4259          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4260          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4261          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4262          0xD869, 0xDED5,
4263
4264          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4265          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4266          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4267          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4268          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4269          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4270          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4271          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4272          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4273          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4274          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4275          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4276          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4277          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4278          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4279          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4280          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4281          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4282
4283          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4284
4285       };
4286
4287     const UChar* uSource;
4288     const UChar* uSourceLimit;
4289     const char* cSource;
4290     const char* cSourceLimit;
4291     UChar *uTargetLimit =NULL;
4292     UChar *uTarget;
4293     char *cTarget;
4294     const char *cTargetLimit;
4295     char *cBuf;
4296     UChar *uBuf,*test;
4297     int32_t uBufSize = 180;
4298     UErrorCode errorCode=U_ZERO_ERROR;
4299     UConverter *cnv;
4300     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4301     int32_t* myOff= offsets;
4302     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4303     if(U_FAILURE(errorCode)) {
4304         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4305         return;
4306     }
4307
4308     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4309     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4310     uSource = (const UChar*)in;
4311     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4312     cTarget = cBuf;
4313     cTargetLimit = cBuf +uBufSize*5;
4314     uTarget = uBuf;
4315     uTargetLimit = uBuf+ uBufSize*5;
4316     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4317     if(U_FAILURE(errorCode)){
4318         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4319         return;
4320     }
4321     cSource = cBuf;
4322     cSourceLimit =cTarget;
4323     test =uBuf;
4324     myOff=offsets;
4325     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4326     if(U_FAILURE(errorCode)){
4327         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4328         return;
4329     }
4330     uSource = (const UChar*)in;
4331     while(uSource<uSourceLimit){
4332         if(*test!=*uSource){
4333             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4334         }
4335         else{
4336             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4337         }
4338         uSource++;
4339         test++;
4340     }
4341     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4342     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4343     /*Test for the condition where there is an invalid character*/
4344     ucnv_reset(cnv);
4345     {
4346         static const uint8_t source2[]={0x0e,0x24,0x053};
4347         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4348     }
4349     ucnv_close(cnv);
4350     free(uBuf);
4351     free(cBuf);
4352     free(offsets);
4353 }
4354
4355 static void
4356 TestISO_2022_CN() {
4357     /* test input */
4358     static const uint16_t in[]={
4359          /* jitterbug 951 */
4360          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4361          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4362          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4363          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4364          0x0020, 0x0045, 0x004e, 0x0044,
4365          /**/
4366          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4367          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4368          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4369          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4370          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4371          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4372          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4373          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4374          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4375          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4376          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4377          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4378          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4379          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4380          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4381          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4382          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4383
4384       };
4385     const UChar* uSource;
4386     const UChar* uSourceLimit;
4387     const char* cSource;
4388     const char* cSourceLimit;
4389     UChar *uTargetLimit =NULL;
4390     UChar *uTarget;
4391     char *cTarget;
4392     const char *cTargetLimit;
4393     char *cBuf;
4394     UChar *uBuf,*test;
4395     int32_t uBufSize = 180;
4396     UErrorCode errorCode=U_ZERO_ERROR;
4397     UConverter *cnv;
4398     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4399     int32_t* myOff= offsets;
4400     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4401     if(U_FAILURE(errorCode)) {
4402         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4403         return;
4404     }
4405
4406     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4407     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4408     uSource = (const UChar*)in;
4409     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4410     cTarget = cBuf;
4411     cTargetLimit = cBuf +uBufSize*5;
4412     uTarget = uBuf;
4413     uTargetLimit = uBuf+ uBufSize*5;
4414     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4415     if(U_FAILURE(errorCode)){
4416         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4417         return;
4418     }
4419     cSource = cBuf;
4420     cSourceLimit =cTarget;
4421     test =uBuf;
4422     myOff=offsets;
4423     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4424     if(U_FAILURE(errorCode)){
4425         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4426         return;
4427     }
4428     uSource = (const UChar*)in;
4429     while(uSource<uSourceLimit){
4430         if(*test!=*uSource){
4431             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4432         }
4433         else{
4434             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4435         }
4436         uSource++;
4437         test++;
4438     }
4439     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4440     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4441     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4442     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4443     TestJitterbug930("csISO2022CN");
4444     /*Test for the condition where there is an invalid character*/
4445     ucnv_reset(cnv);
4446     {
4447         static const uint8_t source2[]={0x0e,0x24,0x053};
4448         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4449     }
4450
4451     ucnv_close(cnv);
4452     free(uBuf);
4453     free(cBuf);
4454     free(offsets);
4455 }
4456
4457 static void
4458 TestEBCDIC_STATEFUL() {
4459     /* test input */
4460     static const uint8_t in[]={
4461         0x61,
4462         0x1a,
4463         0x0f, 0x4b,
4464         0x42,
4465         0x40,
4466         0x36,
4467     };
4468
4469     /* expected test results */
4470     static const int32_t results[]={
4471         /* number of bytes read, code point */
4472         1, 0x002f,
4473         1, 0x0092,
4474         2, 0x002e,
4475         1, 0xff62,
4476         1, 0x0020,
4477         1, 0x0096,
4478
4479     };
4480     static const uint8_t in2[]={
4481         0x0f,
4482         0xa1,
4483         0x01
4484     };
4485
4486     /* expected test results */
4487     static const int32_t results2[]={
4488         /* number of bytes read, code point */
4489         2, 0x203E,
4490         1, 0x0001,
4491     };
4492
4493     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4494     UErrorCode errorCode=U_ZERO_ERROR;
4495     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4496     if(U_FAILURE(errorCode)) {
4497         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4498         return;
4499     }
4500     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4501     ucnv_reset(cnv);
4502      /* Test the condition when source >= sourceLimit */
4503     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4504     ucnv_reset(cnv);
4505     /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4506     {
4507         static const uint8_t source1[]={0x0f};
4508         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4509     }
4510     /*Test for the condition where there is an invalid character*/
4511     ucnv_reset(cnv);
4512     {
4513         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4514         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4515     }
4516     ucnv_reset(cnv);
4517     source=(const char*)in2;
4518     limit=(const char*)in2+sizeof(in2);
4519     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4520     ucnv_close(cnv);
4521
4522 }
4523
4524 static void
4525 TestGB18030() {
4526     /* test input */
4527     static const uint8_t in[]={
4528         0x24,
4529         0x7f,
4530         0x81, 0x30, 0x81, 0x30,
4531         0xa8, 0xbf,
4532         0xa2, 0xe3,
4533         0xd2, 0xbb,
4534         0x82, 0x35, 0x8f, 0x33,
4535         0x84, 0x31, 0xa4, 0x39,
4536         0x90, 0x30, 0x81, 0x30,
4537         0xe3, 0x32, 0x9a, 0x35
4538 #if 0
4539         /*
4540          * Feature removed   markus 2000-oct-26
4541          * Only some codepages must match surrogate pairs into supplementary code points -
4542          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4543          * GB 18030 provides direct encodings for supplementary code points, therefore
4544          * it must not combine two single-encoded surrogates into one code point.
4545          */
4546         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4547 #endif
4548     };
4549
4550     /* expected test results */
4551     static const int32_t results[]={
4552         /* number of bytes read, code point */
4553         1, 0x24,
4554         1, 0x7f,
4555         4, 0x80,
4556         2, 0x1f9,
4557         2, 0x20ac,
4558         2, 0x4e00,
4559         4, 0x9fa6,
4560         4, 0xffff,
4561         4, 0x10000,
4562         4, 0x10ffff
4563 #if 0
4564         /* Feature removed. See comment above. */
4565         8, 0x10000
4566 #endif
4567     };
4568
4569 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4570     UErrorCode errorCode=U_ZERO_ERROR;
4571     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4572     if(U_FAILURE(errorCode)) {
4573         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4574         return;
4575     }
4576     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4577     ucnv_close(cnv);
4578 }
4579
4580 static void
4581 TestLMBCS() {
4582     /* LMBCS-1 string */
4583     static const uint8_t pszLMBCS[]={
4584         0x61,
4585         0x01, 0x29,
4586         0x81,
4587         0xA0,
4588         0x0F, 0x27,
4589         0x0F, 0x91,
4590         0x14, 0x0a, 0x74,
4591         0x14, 0xF6, 0x02,
4592         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4593         0x10, 0x88, 0xA0,
4594     };
4595
4596     /* Unicode UChar32 equivalents */
4597     static const UChar32 pszUnicode32[]={
4598         /* code point */
4599         0x00000061,
4600         0x00002013,
4601         0x000000FC,
4602         0x000000E1,
4603         0x00000007,
4604         0x00000091,
4605         0x00000a74,
4606         0x00000200,
4607         0x00023456, /* code point for surrogate pair */
4608         0x00005516
4609     };
4610
4611 /* Unicode UChar equivalents */
4612     static const UChar pszUnicode[]={
4613         /* code point */
4614         0x0061,
4615         0x2013,
4616         0x00FC,
4617         0x00E1,
4618         0x0007,
4619         0x0091,
4620         0x0a74,
4621         0x0200,
4622         0xD84D, /* low surrogate */
4623         0xDC56, /* high surrogate */
4624         0x5516
4625     };
4626
4627 /* expected test results */
4628     static const int offsets32[]={
4629         /* number of bytes read, code point */
4630         0,
4631         1,
4632         3,
4633         4,
4634         5,
4635         7,
4636         9,
4637         12,
4638         15,
4639         21,
4640         24
4641     };
4642
4643 /* expected test results */
4644     static const int offsets[]={
4645         /* number of bytes read, code point */
4646         0,
4647         1,
4648         3,
4649         4,
4650         5,
4651         7,
4652         9,
4653         12,
4654         15,
4655         18,
4656         21,
4657         24
4658     };
4659
4660
4661     UConverter *cnv;
4662
4663 #define NAME_LMBCS_1 "LMBCS-1"
4664 #define NAME_LMBCS_2 "LMBCS-2"
4665
4666
4667    /* Some basic open/close/property tests on some LMBCS converters */
4668     {
4669
4670       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4671       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4672       char get_subchars [1];
4673       const char * get_name;
4674       UConverter *cnv1;
4675       UConverter *cnv2;
4676
4677       int8_t len = sizeof(get_subchars);
4678
4679       UErrorCode errorCode=U_ZERO_ERROR;
4680
4681       /* Open */
4682       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4683       if(U_FAILURE(errorCode)) {
4684          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4685          return;
4686       }
4687       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4688       if(U_FAILURE(errorCode)) {
4689          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4690          return;
4691       }
4692
4693       /* Name */
4694       get_name = ucnv_getName (cnv1, &errorCode);
4695       if (strcmp(NAME_LMBCS_1,get_name)){
4696          log_err("Unexpected converter name: %s\n", get_name);
4697       }
4698       get_name = ucnv_getName (cnv2, &errorCode);
4699       if (strcmp(NAME_LMBCS_2,get_name)){
4700          log_err("Unexpected converter name: %s\n", get_name);
4701       }
4702
4703       /* substitution chars */
4704       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4705       if(U_FAILURE(errorCode)) {
4706          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4707       }
4708       if (len!=1){
4709          log_err("Unexpected length of sub chars\n");
4710       }
4711       if (get_subchars[0] != expected_subchars[0]){
4712            log_err("Unexpected value of sub chars\n");
4713       }
4714       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4715       if(U_FAILURE(errorCode)) {
4716          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4717       }
4718       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4719       if(U_FAILURE(errorCode)) {
4720          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4721       }
4722       if (len!=1){
4723          log_err("Unexpected length of sub chars\n");
4724       }
4725       if (get_subchars[0] != new_subchars[0]){
4726            log_err("Unexpected value of sub chars\n");
4727       }
4728       ucnv_close(cnv1);
4729       ucnv_close(cnv2);
4730
4731     }
4732
4733     /* LMBCS to Unicode - offsets */
4734     {
4735        UErrorCode errorCode=U_ZERO_ERROR;
4736
4737        const char * pSource = (const char *)pszLMBCS;
4738        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4739
4740        UChar Out [sizeof(pszUnicode) + 1];
4741        UChar * pOut = Out;
4742        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4743
4744        int32_t off [sizeof(offsets)];
4745
4746       /* last 'offset' in expected results is just the final size.
4747          (Makes other tests easier). Compensate here: */
4748
4749        off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4750
4751
4752
4753       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4754       if(U_FAILURE(errorCode)) {
4755            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4756            return;
4757       }
4758
4759
4760
4761       ucnv_toUnicode (cnv,
4762                       &pOut,
4763                       OutLimit,
4764                       &pSource,
4765                       sourceLimit,
4766                       off,
4767                       TRUE,
4768                       &errorCode);
4769
4770
4771        if (memcmp(off,offsets,sizeof(offsets)))
4772        {
4773          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4774        }
4775        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4776        {
4777          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4778        }
4779        ucnv_close(cnv);
4780     }
4781     {
4782    /* LMBCS to Unicode - getNextUChar */
4783       const char * sourceStart;
4784       const char *source=(const char *)pszLMBCS;
4785       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4786       const UChar32 *results= pszUnicode32;
4787       const int *off = offsets32;
4788
4789       UErrorCode errorCode=U_ZERO_ERROR;
4790       UChar32 uniChar;
4791
4792       cnv=ucnv_open("LMBCS-1", &errorCode);
4793       if(U_FAILURE(errorCode)) {
4794            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4795            return;
4796       }
4797       else
4798       {
4799
4800          while(source<limit) {
4801             sourceStart=source;
4802             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4803             if(U_FAILURE(errorCode)) {
4804                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4805                   break;
4806             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4807                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4808                    uniChar, (source-sourceStart), *results, *off);
4809                break;
4810             }
4811             results++;
4812             off++;
4813          }
4814        }
4815        ucnv_close(cnv);
4816     }
4817     { /* test locale & optimization group operations: Unicode to LMBCS */
4818
4819       UErrorCode errorCode=U_ZERO_ERROR;
4820       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4821       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4822       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4823       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4824       const UChar * pUniOut = uniString;
4825       UChar * pUniIn = uniString;
4826       uint8_t lmbcsString [4];
4827       const char * pLMBCSOut = (const char *)lmbcsString;
4828       char * pLMBCSIn = (char *)lmbcsString;
4829
4830       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4831       ucnv_fromUnicode (cnv16he,
4832                         &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4833                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4834                         NULL, 1, &errorCode);
4835
4836       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4837       {
4838          log_err("LMBCS-16,locale=he gives unexpected translation\n");
4839       }
4840
4841       pLMBCSIn= (char *)lmbcsString;
4842       pUniOut = uniString;
4843       ucnv_fromUnicode (cnv01us,
4844                         &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4845                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4846                         NULL, 1, &errorCode);
4847
4848       if (lmbcsString[0] != 0x9F)
4849       {
4850          log_err("LMBCS-1,locale=US gives unexpected translation\n");
4851       }
4852
4853       /* single byte char from mbcs char set */
4854       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
4855       pLMBCSOut = (const char *)lmbcsString;
4856       pUniIn = uniString;
4857       ucnv_toUnicode (cnv16jp,
4858                         &pUniIn, pUniIn + 1,
4859                         &pLMBCSOut, (pLMBCSOut + 1),
4860                         NULL, 1, &errorCode);
4861       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4862       {
4863            log_err("Unexpected results from LMBCS-16 single byte char\n");
4864       }
4865       /* convert to group 1: should be 3 bytes */
4866       pLMBCSIn = (char *)lmbcsString;
4867       pUniOut = uniString;
4868       ucnv_fromUnicode (cnv01us,
4869                         &pLMBCSIn, (const char *)(pLMBCSIn + 3),
4870                         &pUniOut, pUniOut + 1,
4871                         NULL, 1, &errorCode);
4872       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
4873          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4874       {
4875            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4876       }
4877       pLMBCSOut = (const char *)lmbcsString;
4878       pUniIn = uniString;
4879       ucnv_toUnicode (cnv01us,
4880                         &pUniIn, pUniIn + 1,
4881                         &pLMBCSOut, (const char *)(pLMBCSOut + 3),
4882                         NULL, 1, &errorCode);
4883       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4884       {
4885            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4886       }
4887       pLMBCSIn = (char *)lmbcsString;
4888       pUniOut = uniString;
4889       ucnv_fromUnicode (cnv16jp,
4890                         &pLMBCSIn, (const char *)(pLMBCSIn + 1),
4891                         &pUniOut, pUniOut + 1,
4892                         NULL, 1, &errorCode);
4893       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
4894       {
4895            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4896       }
4897       ucnv_close(cnv16he);
4898       ucnv_close(cnv16jp);
4899       ucnv_close(cnv01us);
4900     }
4901     {
4902        /* Small source buffer testing, LMBCS -> Unicode */
4903
4904        UErrorCode errorCode=U_ZERO_ERROR;
4905
4906        const char * pSource = (const char *)pszLMBCS;
4907        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4908        int codepointCount = 0;
4909
4910        UChar Out [sizeof(pszUnicode) + 1];
4911        UChar * pOut = Out;
4912        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4913
4914
4915        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4916        if(U_FAILURE(errorCode)) {
4917            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4918            return;
4919        }
4920
4921
4922        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4923        {
4924            ucnv_toUnicode (cnv,
4925                &pOut,
4926                OutLimit,
4927                &pSource,
4928                (pSource+1), /* claim that this is a 1- byte buffer */
4929                NULL,
4930                FALSE,    /* FALSE means there might be more chars in the next buffer */
4931                &errorCode);
4932
4933            if (U_SUCCESS (errorCode))
4934            {
4935                if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
4936                {
4937                    /* we are on to the next code point: check value */
4938
4939                    if (Out[0] != pszUnicode[codepointCount]){
4940                        log_err("LMBCS->Uni result %lx should have been %lx \n",
4941                            Out[0], pszUnicode[codepointCount]);
4942                    }
4943
4944                    pOut = Out; /* reset for accumulating next code point */
4945                    codepointCount++;
4946                }
4947            }
4948            else
4949            {
4950                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
4951            }
4952        }
4953        {
4954          /* limits & surrogate error testing */
4955          char LIn [sizeof(pszLMBCS)];
4956          const char * pLIn = LIn;
4957
4958          char LOut [sizeof(pszLMBCS)];
4959          char * pLOut = LOut;
4960
4961          UChar UOut [sizeof(pszUnicode)];
4962          UChar * pUOut = UOut;
4963
4964          UChar UIn [sizeof(pszUnicode)];
4965          const UChar * pUIn = UIn;
4966
4967          int32_t off [sizeof(offsets)];
4968          UChar32 uniChar;
4969
4970          errorCode=U_ZERO_ERROR;
4971
4972          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4973          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
4974          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4975          {
4976             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
4977          }
4978          errorCode=U_ZERO_ERROR;
4979          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
4980          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4981          {
4982             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
4983          }
4984          errorCode=U_ZERO_ERROR;
4985
4986          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
4987          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4988          {
4989             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
4990          }
4991          errorCode=U_ZERO_ERROR;
4992
4993          /* 0 byte source request - no error, no pointer movement */
4994          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
4995          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
4996          if(U_FAILURE(errorCode)) {
4997             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
4998          }
4999          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5000          {
5001               log_err("Unexpected pointer move in 0 byte source request \n");
5002          }
5003          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5004          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5005          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5006          {
5007             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5008          }
5009          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5010          {
5011             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5012          }
5013          errorCode = U_ZERO_ERROR;
5014
5015          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5016
5017          pUIn = pszUnicode;
5018          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5019          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5020          {
5021             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5022          }
5023
5024          errorCode = U_ZERO_ERROR;
5025
5026          pLIn = (const char *)pszLMBCS;
5027          ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5028          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5029          {
5030             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5031          }
5032
5033          /* unpaired or chopped LMBCS surrogates */
5034
5035          /* OK high surrogate, Low surrogate is chopped */
5036          LIn [0] = (char)0x14;
5037          LIn [1] = (char)0xD8;
5038          LIn [2] = (char)0x01;
5039          LIn [3] = (char)0x14;
5040          LIn [4] = (char)0xDC;
5041          pLIn = LIn;
5042          errorCode = U_ZERO_ERROR;
5043          pUOut = UOut;
5044
5045          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5046          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5047          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5048          {
5049             log_err("Unexpected results on chopped low surrogate\n");
5050          }
5051
5052          /* chopped at surrogate boundary */
5053          LIn [0] = (char)0x14;
5054          LIn [1] = (char)0xD8;
5055          LIn [2] = (char)0x01;
5056          pLIn = LIn;
5057          errorCode = U_ZERO_ERROR;
5058          pUOut = UOut;
5059
5060          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5061          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5062          {
5063             log_err("Unexpected results on chopped at surrogate boundary \n");
5064          }
5065
5066          /* unpaired surrogate plus valid Unichar */
5067          LIn [0] = (char)0x14;
5068          LIn [1] = (char)0xD8;
5069          LIn [2] = (char)0x01;
5070          LIn [3] = (char)0x14;
5071          LIn [4] = (char)0xC9;
5072          LIn [5] = (char)0xD0;
5073          pLIn = LIn;
5074          errorCode = U_ZERO_ERROR;
5075          pUOut = UOut;
5076
5077          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5078          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5079          {
5080             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5081          }
5082
5083       /* unpaired surrogate plus chopped Unichar */
5084          LIn [0] = (char)0x14;
5085          LIn [1] = (char)0xD8;
5086          LIn [2] = (char)0x01;
5087          LIn [3] = (char)0x14;
5088          LIn [4] = (char)0xC9;
5089
5090          pLIn = LIn;
5091          errorCode = U_ZERO_ERROR;
5092          pUOut = UOut;
5093
5094          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5095          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5096          {
5097             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5098          }
5099
5100          /* unpaired surrogate plus valid non-Unichar */
5101          LIn [0] = (char)0x14;
5102          LIn [1] = (char)0xD8;
5103          LIn [2] = (char)0x01;
5104          LIn [3] = (char)0x0F;
5105          LIn [4] = (char)0x3B;
5106
5107          pLIn = LIn;
5108          errorCode = U_ZERO_ERROR;
5109          pUOut = UOut;
5110
5111          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5112          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5113          {
5114             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5115          }
5116
5117          /* unpaired surrogate plus chopped non-Unichar */
5118          LIn [0] = (char)0x14;
5119          LIn [1] = (char)0xD8;
5120          LIn [2] = (char)0x01;
5121          LIn [3] = (char)0x0F;
5122
5123          pLIn = LIn;
5124          errorCode = U_ZERO_ERROR;
5125          pUOut = UOut;
5126
5127          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5128
5129          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5130          {
5131             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5132          }
5133        }
5134     }
5135    ucnv_close(cnv);  /* final cleanup */
5136 }
5137
5138
5139 static void TestJitterbug255()
5140 {
5141     static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5142     const char *testBuffer = (const char *)testBytes;
5143     const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5144     UErrorCode status = U_ZERO_ERROR;
5145     UChar32 result;
5146     UConverter *cnv = 0;
5147
5148     cnv = ucnv_open("shift-jis", &status);
5149     if (U_FAILURE(status) || cnv == 0) {
5150         log_data_err("Failed to open the converter for SJIS.\n");
5151                 return;
5152     }
5153     while (testBuffer != testEnd)
5154     {
5155         result = ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5156         if (U_FAILURE(status))
5157         {
5158             log_err("Failed to convert the next UChar for SJIS.\n");
5159             break;
5160         }
5161     }
5162     ucnv_close(cnv);
5163 }
5164
5165 static void TestEBCDICUS4XML()
5166 {
5167     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5168     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5169     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5170     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5171     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5172     UChar *unicodes = unicodes_x;
5173     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5174     char *target = target_x;
5175     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5176     UErrorCode status = U_ZERO_ERROR;
5177     UConverter *cnv = 0;
5178
5179     cnv = ucnv_open("ebcdic-xml-us", &status);
5180     if (U_FAILURE(status) || cnv == 0) {
5181         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5182         return;
5183     }
5184     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5185     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5186         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5187             u_errorName(status));
5188         printUSeqErr(unicodes_x, 3);
5189         printUSeqErr(toUnicodeMaps, 3);
5190     }
5191     status = U_ZERO_ERROR;
5192     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5193     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5194         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5195             u_errorName(status));
5196         printSeqErr((const unsigned char*)target_x, 3);
5197         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5198     }
5199     ucnv_close(cnv);
5200 }
5201 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5202
5203 #if !UCONFIG_NO_COLLATION
5204
5205 static void TestJitterbug981(){
5206     const UChar* rules;
5207     int32_t rules_length, target_cap, bytes_needed, buff_size;
5208     UErrorCode status = U_ZERO_ERROR;
5209     UConverter *utf8cnv;
5210     UCollator* myCollator;
5211     char *buff;
5212     int numNeeded=0;
5213     utf8cnv = ucnv_open ("utf8", &status);
5214     if(U_FAILURE(status)){
5215         log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
5216         return;
5217     }
5218     myCollator = ucol_open("zh", &status);
5219     if(U_FAILURE(status)){
5220         log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
5221         return;
5222     }
5223
5224     rules = ucol_getRules(myCollator, &rules_length);
5225     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5226     buff = malloc(buff_size);
5227
5228     target_cap = 0;
5229     do {
5230         ucnv_reset(utf8cnv);
5231         status = U_ZERO_ERROR;
5232         if(target_cap >= buff_size) {
5233             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5234             return;
5235         }
5236         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5237             rules, rules_length, &status);
5238         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5239         if(numNeeded!=0 && numNeeded!= bytes_needed){
5240             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5241         }
5242         numNeeded = bytes_needed;
5243     } while (status == U_BUFFER_OVERFLOW_ERROR);
5244     ucol_close(myCollator);
5245     ucnv_close(utf8cnv);
5246     free(buff);
5247 }
5248
5249 #endif
5250
5251 static void TestJitterbug1293(){
5252     static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5253     char target[256];
5254     UErrorCode status = U_ZERO_ERROR;
5255     UConverter* conv=NULL;
5256     int32_t target_cap, bytes_needed, numNeeded = 0;
5257     conv = ucnv_open("shift-jis",&status);
5258     if(U_FAILURE(status)){
5259       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5260       return;
5261     }
5262
5263     do{
5264         target_cap =0;
5265         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5266         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5267         if(numNeeded!=0 && numNeeded!= bytes_needed){
5268           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5269         }
5270         numNeeded = bytes_needed;
5271     } while (status == U_BUFFER_OVERFLOW_ERROR);
5272     if(U_FAILURE(status)){
5273       log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5274       return;
5275     }
5276     ucnv_close(conv);
5277 }
5278