icuSources/test/cintltst/nucnvtst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2003, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File CCONVTST.C
   9 *
  10 * Modification History:
  11 *        Name                     Description
  12 *    Steven R. Loomis     7/8/1999      Adding input buffer test
  13 *********************************************************************************
  14 */
  15 #include <stdio.h>
  16 #include "cstring.h"
  17 #include "unicode/uloc.h"
  18 #include "unicode/ucnv.h"
  19 #include "unicode/ucnv_err.h"
  20 #include "cintltst.h"
  21 #include "unicode/utypes.h"
  22 #include "unicode/ustring.h"
  23 #include "unicode/ucol.h"
  24 #include "cmemory.h"
  25
  26 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message);
  27 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
  28 #if !UCONFIG_NO_COLLATION
  29 static void TestJitterbug981(void);
  30 #endif
  31 static void TestJitterbug1293(void);
  32 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
  33 static void TestConverterTypesAndStarters(void);
  34 static void TestAmbiguous(void);
  35 static void TestSignatureDetection(void);
  36 static void TestUTF7(void);
  37 static void TestIMAP(void);
  38 static void TestUTF8(void);
  39 static void TestCESU8(void);
  40 static void TestUTF16(void);
  41 static void TestUTF16BE(void);
  42 static void TestUTF16LE(void);
  43 static void TestUTF32(void);
  44 static void TestUTF32BE(void);
  45 static void TestUTF32LE(void);
  46 static void TestLATIN1(void);
  47 static void TestSBCS(void);
  48 static void TestDBCS(void);
  49 static void TestMBCS(void);
  50 static void TestISO_2022(void);
  51 static void TestISO_2022_JP(void);
  52 static void TestISO_2022_JP_1(void);
  53 static void TestISO_2022_JP_2(void);
  54 static void TestISO_2022_KR(void);
  55 static void TestISO_2022_KR_1(void);
  56 static void TestISO_2022_CN(void);
  57 static void TestISO_2022_CN_EXT(void);
  58 static void TestJIS(void);
  59 static void TestHZ(void);
  60 static void TestSCSU(void);
  61 static void TestEBCDIC_STATEFUL(void);
  62 static void TestGB18030(void);
  63 static void TestLMBCS(void);
  64 static void TestJitterbug255(void);
  65 static void TestEBCDICUS4XML(void);
  66 static void TestJitterbug915(void);
  67 static void TestISCII(void);
  68 static void TestConv(const uint16_t in[],
  69                      int len,
  70                      const char* conv,
  71                      const char* lang,
  72                      char byteArr[],
  73                      int byteArrLen);
  74 static void TestRoundTrippingAllUTF(void);
  75 static void TestCoverageMBCS(void);
  76 static void TestJitterbug2346(void);
  77 static void TestJitterbug2411(void);
  78 void addTestNewConvert(TestNode** root);
  79
  80 /* open a converter, using test data if it begins with '@' */
  81 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
  82
  83
  84 #define NEW_MAX_BUFFER 999
  85
  86 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
  87 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
  88 static char     gNuConvTestName[1024];
  89
  90 #define nct_min(x,y)  ((x<y) ? x : y)
  91
  92 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
  93 {
  94   if(cnv && cnv[0] == '@') {
  95     return ucnv_openPackage("testdata", cnv+1, err);
  96   } else {
  97     return ucnv_open(cnv, err);
  98   }
  99 }
 100
 101 static void printSeq(const unsigned char* a, int len)
 102 {
 103     int i=0;
 104     log_verbose("{");
 105     while (i<len)
 106         log_verbose("0x%02x ", a[i++]);
 107     log_verbose("}\n");
 108 }
 109
 110 static void printUSeq(const UChar* a, int len)
 111 {
 112     int i=0;
 113     log_verbose("{U+");
 114     while (i<len) log_verbose("0x%04x ", a[i++]);
 115     log_verbose("}\n");
 116 }
 117
 118 static void printSeqErr(const unsigned char* a, int len)
 119 {
 120     int i=0;
 121     fprintf(stderr, "{");
 122     while (i<len)
 123         fprintf(stderr, "0x%02x ", a[i++]);
 124     fprintf(stderr, "}\n");
 125 }
 126
 127 static void printUSeqErr(const UChar* a, int len)
 128 {
 129     int i=0;
 130     fprintf(stderr, "{U+");
 131     while (i<len)
 132         fprintf(stderr, "0x%04x ", a[i++]);
 133     fprintf(stderr,"}\n");
 134 }
 135
 136 static void
 137 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message)
 138 {
 139      const char* s0;
 140      const char* s=(char*)source;
 141      const uint32_t *r=results;
 142      UErrorCode errorCode=U_ZERO_ERROR;
 143      uint32_t c;
 144
 145      while(s<limit) {
 146         s0=s;
 147         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 148         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 149             break; /* no more significant input */
 150         } else if(U_FAILURE(errorCode)) {
 151             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
 152             break;
 153         } else if((uint32_t)(s-s0)!=*r || c!=*(r+1)) {
 154             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
 155                 message, c, (s-s0), *(r+1), *r);
 156             break;
 157         }
 158         r+=2;
 159     }
 160 }
 161
 162 static void
 163 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
 164 {
 165      const char* s=(char*)source;
 166      UErrorCode errorCode=U_ZERO_ERROR;
 167      uint32_t c;
 168      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 169      if(errorCode != expected){
 170         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
 171      }
 172      if(c != 0xFFFD && c != 0xffff){
 173         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
 174      }
 175
 176 }
 177
 178 static void TestInBufSizes(void)
 179 {
 180   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
 181 #if 1
 182   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
 183   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
 184   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
 185   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
 186   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
 187   TestNewConvertWithBufferSizes(1,1);
 188   TestNewConvertWithBufferSizes(2,3);
 189   TestNewConvertWithBufferSizes(3,2);
 190 #endif
 191 }
 192
 193 static void TestOutBufSizes(void)
 194 {
 195 #if 1
 196   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
 197   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
 198   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
 199   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
 200   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
 201   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
 202
 203 #endif
 204 }
 205
 206
 207 void addTestNewConvert(TestNode** root)
 208 {
 209    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
 210    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
 211    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
 212    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
 213    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
 214    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
 215    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
 216    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
 217    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
 218    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
 219    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
 220    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
 221    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
 222    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
 223    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
 224    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
 225    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
 226    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
 227    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
 228    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
 229    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
 230    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
 231    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
 232    addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
 233    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
 234    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
 235    addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
 236    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
 237    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
 238    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
 239    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
 240    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
 241    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
 242    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
 243    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
 244    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
 245    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
 246 #if !UCONFIG_NO_COLLATION
 247    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
 248 #endif
 249    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
 250    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
 251    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
 252    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
 253    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
 254
 255 }
 256
 257
 258 /* Note that this test already makes use of statics, so it's not really
 259    multithread safe.
 260    This convenience function lets us make the error messages actually useful.
 261 */
 262
 263 static void setNuConvTestName(const char *codepage, const char *direction)
 264 {
 265   sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
 266       codepage,
 267       direction,
 268       gInBufferSize,
 269       gOutBufferSize);
 270 }
 271
 272 typedef enum
 273 {
 274   TC_OK       = 0,  /* test was OK */
 275   TC_MISMATCH = 1,  /* Match failed - err was printed */
 276   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
 277 } ETestConvertResult;
 278
 279 /* Note: This function uses global variables and it will not do offset
 280 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 281 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
 282                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
 283 {
 284     UErrorCode status = U_ZERO_ERROR;
 285     UConverter *conv = 0;
 286     uint8_t    junkout[NEW_MAX_BUFFER]; /* FIX */
 287     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 288     uint8_t *p;
 289     const UChar *src;
 290     uint8_t *end;
 291     uint8_t *targ;
 292     int32_t *offs;
 293     int i;
 294     int32_t   realBufferSize;
 295     uint8_t *realBufferEnd;
 296     const UChar *realSourceEnd;
 297     const UChar *sourceLimit;
 298     UBool checkOffsets = TRUE;
 299     UBool doFlush;
 300
 301     for(i=0;i<NEW_MAX_BUFFER;i++)
 302         junkout[i] = 0xF0;
 303     for(i=0;i<NEW_MAX_BUFFER;i++)
 304         junokout[i] = 0xFF;
 305
 306     setNuConvTestName(codepage, "FROM");
 307
 308     log_verbose("\n=========  %s\n", gNuConvTestName);
 309
 310     conv = my_ucnv_open(codepage, &status);
 311
 312     if(U_FAILURE(status))
 313     {
 314         log_data_err("Couldn't open converter %s\n",codepage);
 315         return TC_FAIL;
 316     }
 317     if(useFallback){
 318         ucnv_setFallback(conv,useFallback);
 319     }
 320
 321     log_verbose("Converter opened..\n");
 322
 323     src = source;
 324     targ = junkout;
 325     offs = junokout;
 326
 327     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 328     realBufferEnd = junkout + realBufferSize;
 329     realSourceEnd = source + sourceLen;
 330
 331     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
 332         checkOffsets = FALSE;
 333
 334     do
 335     {
 336       end = nct_min(targ + gOutBufferSize, realBufferEnd);
 337       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
 338
 339       doFlush = (UBool)(sourceLimit == realSourceEnd);
 340
 341       if(targ == realBufferEnd) {
 342         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
 343         return TC_FAIL;
 344       }
 345       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
 346
 347
 348       status = U_ZERO_ERROR;
 349
 350       ucnv_fromUnicode (conv,
 351                         (char **)&targ,
 352                         (const char*)end,
 353                         &src,
 354                         sourceLimit,
 355                         checkOffsets ? offs : NULL,
 356                         doFlush, /* flush if we're at the end of the input data */
 357                         &status);
 358     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
 359
 360     if(U_FAILURE(status)) {
 361       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 362       return TC_FAIL;
 363     }
 364
 365     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
 366                 sourceLen, targ-junkout);
 367
 368     if(VERBOSITY)
 369     {
 370       char junk[9999];
 371       char offset_str[9999];
 372       uint8_t *ptr;
 373
 374       junk[0] = 0;
 375       offset_str[0] = 0;
 376       for(ptr = junkout;ptr<targ;ptr++) {
 377         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
 378         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
 379       }
 380
 381       log_verbose(junk);
 382       printSeq((const uint8_t *)expect, expectLen);
 383       if ( checkOffsets ) {
 384         log_verbose("\nOffsets:");
 385         log_verbose(offset_str);
 386       }
 387       log_verbose("\n");
 388     }
 389     ucnv_close(conv);
 390
 391     if(expectLen != targ-junkout) {
 392       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 393       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 394       printf("\nGot:");
 395       printSeqErr((const unsigned char*)junkout, targ-junkout);
 396       printf("\nExpected:");
 397       printSeqErr((const unsigned char*)expect, expectLen);
 398       return TC_MISMATCH;
 399     }
 400
 401     if (checkOffsets && (expectOffsets != 0) ) {
 402       log_verbose("comparing %d offsets..\n", targ-junkout);
 403       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
 404         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
 405         printSeqErr((const unsigned char*)junkout, targ-junkout);
 406         log_err("\n");
 407         log_err("Got  :     ");
 408         for(p=junkout;p<targ;p++) {
 409           log_err("%d,", junokout[p-junkout]);
 410         }
 411         log_err("\n");
 412         log_err("Expected:  ");
 413         for(i=0; i<(targ-junkout); i++) {
 414           log_err("%d,", expectOffsets[i]);
 415         }
 416         log_err("\n");
 417       }
 418     }
 419
 420     log_verbose("comparing..\n");
 421     if(!memcmp(junkout, expect, expectLen)) {
 422       log_verbose("Matches!\n");
 423       return TC_OK;
 424     } else {
 425       log_err("String does not match u->%s\n", gNuConvTestName);
 426       printUSeqErr(source, sourceLen);
 427       printf("\nGot:");
 428       printSeqErr((const unsigned char *)junkout, expectLen);
 429       printf("\nExpected:");
 430       printSeqErr((const unsigned char *)expect, expectLen);
 431
 432       return TC_MISMATCH;
 433     }
 434 }
 435
 436 /* Note: This function uses global variables and it will not do offset
 437 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 438 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
 439                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
 440 {
 441     UErrorCode status = U_ZERO_ERROR;
 442     UConverter *conv = 0;
 443     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
 444     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 445     const uint8_t *src;
 446     const uint8_t *realSourceEnd;
 447     const uint8_t *srcLimit;
 448     UChar *p;
 449     UChar *targ;
 450     UChar *end;
 451     int32_t *offs;
 452     int i;
 453     UBool   checkOffsets = TRUE;
 454
 455     int32_t   realBufferSize;
 456     UChar *realBufferEnd;
 457
 458
 459     for(i=0;i<NEW_MAX_BUFFER;i++)
 460         junkout[i] = 0xFFFE;
 461
 462     for(i=0;i<NEW_MAX_BUFFER;i++)
 463         junokout[i] = -1;
 464
 465     setNuConvTestName(codepage, "TO");
 466
 467     log_verbose("\n=========  %s\n", gNuConvTestName);
 468
 469     conv = my_ucnv_open(codepage, &status);
 470
 471     if(U_FAILURE(status))
 472     {
 473         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
 474         return TC_FAIL;
 475     }
 476     if(useFallback){
 477         ucnv_setFallback(conv,useFallback);
 478     }
 479     log_verbose("Converter opened..\n");
 480
 481     src = source;
 482     targ = junkout;
 483     offs = junokout;
 484
 485     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 486     realBufferEnd = junkout + realBufferSize;
 487     realSourceEnd = src + sourcelen;
 488
 489     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
 490         checkOffsets = FALSE;
 491
 492     do
 493     {
 494         end = nct_min( targ + gOutBufferSize, realBufferEnd);
 495         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
 496
 497         if(targ == realBufferEnd)
 498         {
 499             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
 500             return TC_FAIL;
 501         }
 502         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
 503
 504         /* oldTarg = targ; */
 505
 506         status = U_ZERO_ERROR;
 507
 508         ucnv_toUnicode (conv,
 509                 &targ,
 510                 end,
 511                 (const char **)&src,
 512                 (const char *)srcLimit,
 513                 checkOffsets ? offs : NULL,
 514                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
 515                 &status);
 516
 517         /*        offs += (targ-oldTarg); */
 518
 519       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
 520
 521     if(U_FAILURE(status))
 522     {
 523         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 524         return TC_FAIL;
 525     }
 526
 527     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
 528         sourcelen, targ-junkout);
 529     if(VERBOSITY)
 530     {
 531         char junk[9999];
 532         char offset_str[9999];
 533         UChar *ptr;
 534
 535         junk[0] = 0;
 536         offset_str[0] = 0;
 537
 538         for(ptr = junkout;ptr<targ;ptr++)
 539         {
 540             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
 541             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
 542         }
 543
 544         log_verbose(junk);
 545         printUSeq(expect, expectlen);
 546         if ( checkOffsets )
 547           {
 548             log_verbose("\nOffsets:");
 549             log_verbose(offset_str);
 550           }
 551         log_verbose("\n");
 552     }
 553     ucnv_close(conv);
 554
 555     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
 556
 557     if (checkOffsets && (expectOffsets != 0))
 558     {
 559         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
 560             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
 561             log_err("Got:      ");
 562             for(p=junkout;p<targ;p++) {
 563                 log_err("%d,", junokout[p-junkout]);
 564             }
 565             log_err("\n");
 566             log_err("Expected: ");
 567             for(i=0; i<(targ-junkout); i++) {
 568                 log_err("%d,", expectOffsets[i]);
 569             }
 570             log_err("\n");
 571             log_err("output:   ");
 572             for(i=0; i<(targ-junkout); i++) {
 573                 log_err("%X,", junkout[i]);
 574             }
 575             log_err("\n");
 576             log_err("input:    ");
 577             for(i=0; i<(src-source); i++) {
 578                 log_err("%X,", (unsigned char)source[i]);
 579             }
 580             log_err("\n");
 581         }
 582     }
 583
 584     if(!memcmp(junkout, expect, expectlen*2))
 585     {
 586         log_verbose("Matches!\n");
 587         return TC_OK;
 588     }
 589     else
 590     {
 591         log_err("String does not match. %s\n", gNuConvTestName);
 592         log_verbose("String does not match. %s\n", gNuConvTestName);
 593         printf("\nGot:");
 594         printUSeqErr(junkout, expectlen);
 595         printf("\nExpected:");
 596         printUSeqErr(expect, expectlen);
 597         return TC_MISMATCH;
 598     }
 599 }
 600
 601
 602 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
 603 {
 604 /** test chars #1 */
 605     /*  1 2 3  1Han 2Han 3Han .  */
 606     UChar    sampleText[] =
 607      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
 608
 609
 610     const uint8_t expectedUTF8[] =
 611      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
 612     int32_t  toUTF8Offs[] =
 613      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
 614     int32_t fmUTF8Offs[] =
 615      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
 616
 617     /* Same as UTF8, but with ^[%B preceeding */
 618     const uint8_t expectedISO2022[] =
 619      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
 620     int32_t  toISO2022Offs[]     =
 621      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
 622        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
 623     int32_t fmISO2022Offs[] =
 624      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
 625
 626     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
 627     const uint8_t expectedIBM930[] =
 628      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
 629     int32_t  toIBM930Offs[] =
 630      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
 631     int32_t fmIBM930Offs[] =
 632      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
 633
 634     /* 1 2 3 0 h1 h2 h3 . MBCS*/
 635     const uint8_t expectedIBM943[] =
 636      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
 637     int32_t  toIBM943Offs    [] =
 638      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
 639     int32_t fmIBM943Offs[] =
 640      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
 641
 642     /* 1 2 3 0 h1 h2 h3 . DBCS*/
 643     const uint8_t expectedIBM9027[] =
 644      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
 645     int32_t  toIBM9027Offs    [] =
 646      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
 647
 648      /* 1 2 3 0 <?> <?> <?> . SBCS*/
 649     const uint8_t expectedIBM920[] =
 650      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
 651     int32_t  toIBM920Offs    [] =
 652      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
 653
 654     /* 1 2 3 0 <?> <?> <?> . SBCS*/
 655     const uint8_t expectedISO88593[] =
 656      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
 657     int32_t  toISO88593Offs[]     =
 658      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
 659
 660     /* 1 2 3 0 <?> <?> <?> . LATIN_1*/
 661     const uint8_t expectedLATIN1[] =
 662      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
 663     int32_t  toLATIN1Offs[]     =
 664      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
 665
 666
 667     /*  etc */
 668     const uint8_t expectedUTF16BE[] =
 669      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
 670     int32_t      toUTF16BEOffs[]=
 671      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
 672     int32_t fmUTF16BEOffs[] =
 673      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e };
 674
 675     const uint8_t expectedUTF16LE[] =
 676      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
 677     int32_t      toUTF16LEOffs[]=
 678      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07};
 679     int32_t fmUTF16LEOffs[] =
 680      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e };
 681
 682     const uint8_t expectedUTF32BE[] =
 683      { 0x00, 0x00, 0x00, 0x31,
 684        0x00, 0x00, 0x00, 0x32,
 685        0x00, 0x00, 0x00, 0x33,
 686        0x00, 0x00, 0x00, 0x00,
 687        0x00, 0x00, 0x4e, 0x00,
 688        0x00, 0x00, 0x4e, 0x8c,
 689        0x00, 0x00, 0x4e, 0x09,
 690        0x00, 0x00, 0x00, 0x2e };
 691     int32_t      toUTF32BEOffs[]=
 692      { 0x00, 0x00, 0x00, 0x00,
 693        0x01, 0x01, 0x01, 0x01,
 694        0x02, 0x02, 0x02, 0x02,
 695        0x03, 0x03, 0x03, 0x03,
 696        0x04, 0x04, 0x04, 0x04,
 697        0x05, 0x05, 0x05, 0x05,
 698        0x06, 0x06, 0x06, 0x06,
 699        0x07, 0x07, 0x07, 0x07,
 700        0x08, 0x08, 0x08, 0x08 };
 701     int32_t fmUTF32BEOffs[] =
 702      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c };
 703
 704     const uint8_t expectedUTF32LE[] =
 705      { 0x31, 0x00, 0x00, 0x00,
 706        0x32, 0x00, 0x00, 0x00,
 707        0x33, 0x00, 0x00, 0x00,
 708        0x00, 0x00, 0x00, 0x00,
 709        0x00, 0x4e, 0x00, 0x00,
 710        0x8c, 0x4e, 0x00, 0x00,
 711        0x09, 0x4e, 0x00, 0x00,
 712        0x2e, 0x00, 0x00, 0x00 };
 713     int32_t      toUTF32LEOffs[]=
 714      { 0x00, 0x00, 0x00, 0x00,
 715        0x01, 0x01, 0x01, 0x01,
 716        0x02, 0x02, 0x02, 0x02,
 717        0x03, 0x03, 0x03, 0x03,
 718        0x04, 0x04, 0x04, 0x04,
 719        0x05, 0x05, 0x05, 0x05,
 720        0x06, 0x06, 0x06, 0x06,
 721        0x07, 0x07, 0x07, 0x07,
 722        0x08, 0x08, 0x08, 0x08 };
 723     int32_t fmUTF32LEOffs[] =
 724      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c };
 725
 726
 727
 728
 729 /** Test chars #2 **/
 730
 731     /* Sahha [health],  slashed h's */
 732     const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
 733     const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
 734
 735     /* LMBCS */
 736     const UChar LMBCSUChars[]  = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
 737     const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
 738     int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
 739     int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
 740     /*********************************** START OF CODE finally *************/
 741
 742   gInBufferSize = insize;
 743   gOutBufferSize = outsize;
 744
 745   log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
 746
 747
 748 #if 1
 749     /*UTF-8*/
 750     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 751         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
 752
 753     log_verbose("Test surrogate behaviour for UTF8\n");
 754     {
 755         const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
 756         const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
 757                            0xf0, 0x90, 0x90, 0x81,
 758                            0xef, 0xbf, 0xbd
 759         };
 760         int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
 761         testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
 762                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
 763
 764
 765     }
 766     /*ISO-2022*/
 767     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 768         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
 769     /*UTF16 LE*/
 770     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 771         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
 772     /*UTF16 BE*/
 773     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 774         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
 775     /*UTF32 LE*/
 776     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 777         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
 778     /*UTF32 BE*/
 779     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 780         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
 781     /*LATIN_1*/
 782     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 783         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
 784     /*EBCDIC_STATEFUL*/
 785     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 786         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
 787
 788     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 789         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 790
 791     /*MBCS*/
 792
 793     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 794         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
 795     /*DBCS*/
 796     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 797         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
 798     /*SBCS*/
 799     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 800         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
 801     /*SBCS*/
 802     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 803         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 804
 805
 806 /****/
 807 #endif
 808
 809 #if 1
 810     /*UTF-8*/
 811     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
 812         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
 813     /*ISO-2022*/
 814     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
 815         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
 816     /*UTF16 LE*/
 817     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 818         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 819     /*UTF16 BE*/
 820     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
 821         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
 822     /*UTF32 LE*/
 823     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
 824         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
 825     /*UTF32 BE*/
 826     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
 827         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
 828     /*EBCDIC_STATEFUL*/
 829     testConvertToU(expectedIBM930, sizeof(expectedIBM930),
 830         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
 831     /*MBCS*/
 832     testConvertToU(expectedIBM943, sizeof(expectedIBM943),
 833         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
 834
 835     /* Try it again to make sure it still works */
 836     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 837         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 838
 839     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
 840         malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
 841
 842     testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
 843         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
 844
 845     /*LMBCS*/
 846     testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
 847         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
 848     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
 849         LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
 850
 851     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
 852     {
 853         /* encode directly set D and set O */
 854         static const uint8_t utf7[] = {
 855             /*
 856                 Hi Mom -+Jjo--!
 857                 A+ImIDkQ.
 858                 +-
 859                 +ZeVnLIqe
 860             */
 861             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
 862             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 863             0x2b, 0x2d,
 864             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 865         };
 866         static const UChar unicode[] = {
 867             /*
 868                 Hi Mom -<WHITE SMILING FACE>-!
 869                 A<NOT IDENTICAL TO><ALPHA>.
 870                 +
 871                 [Japanese word "nihongo"]
 872             */
 873             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
 874             0x41, 0x2262, 0x0391, 0x2e,
 875             0x2b,
 876             0x65e5, 0x672c, 0x8a9e
 877         };
 878         static const int32_t toUnicodeOffsets[] = {
 879             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
 880             15, 17, 19, 23,
 881             24,
 882             27, 29, 32
 883         };
 884         static const int32_t fromUnicodeOffsets[] = {
 885             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
 886             11, 12, 12, 12, 13, 13, 13, 13, 14,
 887             15, 15,
 888             16, 16, 16, 17, 17, 17, 18, 18, 18
 889         };
 890
 891         /* same but escaping set O (the exclamation mark) */
 892         static const uint8_t utf7Restricted[] = {
 893             /*
 894                 Hi Mom -+Jjo--+ACE-
 895                 A+ImIDkQ.
 896                 +-
 897                 +ZeVnLIqe
 898             */
 899             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
 900             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 901             0x2b, 0x2d,
 902             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 903         };
 904         static const int32_t toUnicodeOffsetsR[] = {
 905             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
 906             19, 21, 23, 27,
 907             28,
 908             31, 33, 36
 909         };
 910         static const int32_t fromUnicodeOffsetsR[] = {
 911             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
 912             11, 12, 12, 12, 13, 13, 13, 13, 14,
 913             15, 15,
 914             16, 16, 16, 17, 17, 17, 18, 18, 18
 915         };
 916
 917         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
 918
 919         testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
 920
 921         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
 922
 923         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
 924     }
 925
 926     /*
 927      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
 928      * modified according to RFC 2060,
 929      * and supplemented with the one example in RFC 2060 itself.
 930      */
 931     {
 932         static const uint8_t imap[] = {
 933             /*  Hi Mom -&Jjo--!
 934                 A&ImIDkQ-.
 935                 &-
 936                 &ZeVnLIqe-
 937                 \
 938                 ~peter
 939                 /mail
 940                 /&ZeVnLIqe-
 941                 /&U,BTFw-
 942             */
 943             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
 944             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
 945             0x26, 0x2d,
 946             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
 947             0x5c,
 948             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
 949             0x2f, 0x6d, 0x61, 0x69, 0x6c,
 950             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
 951             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
 952         };
 953         static const UChar unicode[] = {
 954             /*  Hi Mom -<WHITE SMILING FACE>-!
 955                 A<NOT IDENTICAL TO><ALPHA>.
 956                 &
 957                 [Japanese word "nihongo"]
 958                 \
 959                 ~peter
 960                 /mail
 961                 /<65e5, 672c, 8a9e>
 962                 /<53f0, 5317>
 963             */
 964             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
 965             0x41, 0x2262, 0x0391, 0x2e,
 966             0x26,
 967             0x65e5, 0x672c, 0x8a9e,
 968             0x5c,
 969             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
 970             0x2f, 0x6d, 0x61, 0x69, 0x6c,
 971             0x2f, 0x65e5, 0x672c, 0x8a9e,
 972             0x2f, 0x53f0, 0x5317
 973         };
 974         static const int32_t toUnicodeOffsets[] = {
 975             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
 976             15, 17, 19, 24,
 977             25,
 978             28, 30, 33,
 979             37,
 980             38, 39, 40, 41, 42, 43,
 981             44, 45, 46, 47, 48,
 982             49, 51, 53, 56,
 983             60, 62, 64
 984         };
 985         static const int32_t fromUnicodeOffsets[] = {
 986             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
 987             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
 988             15, 15,
 989             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
 990             19,
 991             20, 21, 22, 23, 24, 25,
 992             26, 27, 28, 29, 30,
 993             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
 994             35, 36, 36, 36, 37, 37, 37, 37, 37
 995         };
 996
 997         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
 998
 999         testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1000     }
1001
1002     /* Test UTF-8 bad data handling*/
1003     {
1004         static const uint8_t utf8[]={
1005             0x61,
1006             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1007             0x00,
1008             0x62,
1009             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1010             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1011             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1012             0xdf, 0xbf,                     /* 7ff */
1013             0xbf,                           /* truncated tail */
1014             0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1015             0x02
1016         };
1017
1018         static const uint16_t utf8Expected[]={
1019             0x0061,
1020             0xfffd,
1021             0x0000,
1022             0x0062,
1023             0xfffd,
1024             0xfffd,
1025             0xdbff, 0xdfff,
1026             0x07ff,
1027             0xfffd,
1028             0xfffd,
1029             0x0002
1030         };
1031
1032         static const int32_t utf8Offsets[]={
1033             0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1034         };
1035         testConvertToU(utf8, sizeof(utf8),
1036                        utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1037
1038     }
1039
1040     /* Test UTF-32BE bad data handling*/
1041     {
1042         static const uint8_t utf32[]={
1043             0x00, 0x00, 0x00, 0x61,
1044             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1045             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1046             0x00, 0x00, 0x00, 0x62,
1047             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1048             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1049             0x00, 0x00, 0x01, 0x62,
1050             0x00, 0x00, 0x02, 0x62
1051         };
1052
1053         static const uint16_t utf32Expected[]={
1054             0x0061,
1055             0xfffd,         /* 0x110000 out of range */
1056             0xDBFF,         /* 0x10FFFF in range */
1057             0xDFFF,
1058             0x0062,
1059             0xfffd,         /* 0xffffffff out of range */
1060             0xfffd,         /* 0x7fffffff out of range */
1061             0x0162,
1062             0x0262
1063         };
1064
1065         static const int32_t utf32Offsets[]={
1066             0, 4, 8, 8, 12, 16, 20, 24, 28
1067         };
1068         testConvertToU(utf32, sizeof(utf32),
1069                        utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1070
1071     }
1072
1073     /* Test UTF-32LE bad data handling*/
1074     {
1075         static const uint8_t utf32[]={
1076             0x61, 0x00, 0x00, 0x00,
1077             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1078             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1079             0x62, 0x00, 0x00, 0x00,
1080             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1081             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1082             0x62, 0x01, 0x00, 0x00,
1083             0x62, 0x02, 0x00, 0x00,
1084         };
1085
1086         static const uint16_t utf32Expected[]={
1087             0x0061,
1088             0xfffd,         /* 0x110000 out of range */
1089             0xDBFF,         /* 0x10FFFF in range */
1090             0xDFFF,
1091             0x0062,
1092             0xfffd,         /* 0xffffffff out of range */
1093             0xfffd,         /* 0x7fffffff out of range */
1094             0x0162,
1095             0x0262
1096         };
1097
1098         static const int32_t utf32Offsets[]={
1099             0, 4, 8, 8, 12, 16, 20, 24, 28
1100         };
1101         testConvertToU(utf32, sizeof(utf32),
1102             utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1103
1104     }
1105 }
1106
1107 static void TestCoverageMBCS(){
1108 #if 0
1109     UErrorCode status = U_ZERO_ERROR;
1110     const char *directory = loadTestData(&status);
1111     char* tdpath = NULL;
1112     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1113     int len = strlen(directory);
1114     char* index=NULL;
1115
1116     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1117     uprv_strcpy(saveDirectory,u_getDataDirectory());
1118     log_verbose("Retrieved data directory %s \n",saveDirectory);
1119     uprv_strcpy(tdpath,directory);
1120     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1121
1122     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1123             *(index+1)=0;
1124     }
1125     u_setDataDirectory(tdpath);
1126     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1127 #endif
1128
1129     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1130       which is test file for MBCS conversion with single-byte codepage data.*/
1131     {
1132
1133         /* MBCS with single byte codepage data test1.ucm*/
1134         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1135         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1136         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1137
1138         const uint8_t test1input[]    = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
1139         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd};
1140         int32_t fromtest1Offs[]       = { 0, 1, 2, 3, 3, 4, 5};
1141
1142         /*from Unicode*/
1143         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1144             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1145
1146         /*to Unicode*/
1147         testConvertToU(test1input, sizeof(test1input),
1148             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE);
1149
1150     }
1151
1152     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1153       which is test file for MBCS conversion with three-byte codepage data.*/
1154     {
1155
1156         /* MBCS with three byte codepage data test3.ucm*/
1157         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1158         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1159         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1160
1161         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1162         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1163         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1164
1165         /*from Unicode*/
1166         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1167             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1168
1169         /*to Unicode*/
1170         testConvertToU(test3input, sizeof(test3input),
1171             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1172
1173     }
1174
1175     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1176       which is test file for MBCS conversion with four-byte codepage data.*/
1177     {
1178
1179         /* MBCS with three byte codepage data test4.ucm*/
1180         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1181         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1182         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1183
1184         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1185         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1186         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1187
1188         /*from Unicode*/
1189         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1190             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1191
1192         /*to Unicode*/
1193         testConvertToU(test4input, sizeof(test4input),
1194             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1195
1196     }
1197 #if 0
1198     free(tdpath);
1199     /* restore the original data directory */
1200     log_verbose("Setting the data directory to %s \n", saveDirectory);
1201     u_setDataDirectory(saveDirectory);
1202     free(saveDirectory);
1203 #endif
1204
1205 }
1206
1207 static void TestConverterType(const char *convName, UConverterType convType) {
1208     UConverter* myConverter;
1209     UErrorCode err = U_ZERO_ERROR;
1210
1211     myConverter = my_ucnv_open(convName, &err);
1212
1213     if (U_FAILURE(err)) {
1214         log_data_err("Failed to create an %s converter\n", convName);
1215         return;
1216     }
1217     else
1218     {
1219         if (ucnv_getType(myConverter)!=convType) {
1220             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1221                 convName, convType);
1222         }
1223         else {
1224             log_verbose("ucnv_getType %s ok\n", convName);
1225         }
1226     }
1227     ucnv_close(myConverter);
1228 }
1229
1230 static void TestConverterTypesAndStarters()
1231 {
1232     UConverter* myConverter;
1233     UErrorCode err = U_ZERO_ERROR;
1234     UBool mystarters[256];
1235
1236 /*    const UBool expectedKSCstarters[256] = {
1237         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1238         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1239         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1240         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1241         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1242         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1243         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1244         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1245         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1246         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1247         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1248         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1249         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1250         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1251         FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1252         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1253         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1254         TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1255         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1256         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1257         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1258         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1259         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1260         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1261         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1262         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1263
1264
1265     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1266
1267     myConverter = ucnv_open("ksc", &err);
1268     if (U_FAILURE(err)) {
1269       log_data_err("Failed to create an ibm-ksc converter\n");
1270       return;
1271     }
1272     else
1273     {
1274         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1275             log_err("ucnv_getType Failed for ibm-949\n");
1276         else
1277             log_verbose("ucnv_getType ibm-949 ok\n");
1278
1279         if(myConverter!=NULL)
1280             ucnv_getStarters(myConverter, mystarters, &err);
1281
1282         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1283           log_err("Failed ucnv_getStarters for ksc\n");
1284           else
1285           log_verbose("ucnv_getStarters ok\n");*/
1286
1287     }
1288     ucnv_close(myConverter);
1289
1290     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1291     TestConverterType("ibm-878", UCNV_SBCS);
1292     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1293     TestConverterType("ibm-1208", UCNV_UTF8);
1294     TestConverterType("utf-8", UCNV_UTF8);
1295     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1296     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1297     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1298     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1299     TestConverterType("iso-2022", UCNV_ISO_2022);
1300     TestConverterType("hz", UCNV_HZ);
1301     TestConverterType("scsu", UCNV_SCSU);
1302     TestConverterType("x-iscii-de", UCNV_ISCII);
1303     TestConverterType("ascii", UCNV_US_ASCII);
1304     TestConverterType("utf-7", UCNV_UTF7);
1305     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1306     TestConverterType("bocu-1", UCNV_BOCU1);
1307 }
1308
1309 static void
1310 TestAmbiguousConverter(UConverter *cnv) {
1311     static const char inBytes[2]={ 0x61, 0x5c };
1312     UChar outUnicode[20]={ 0, 0, 0, 0 };
1313
1314     const char *s;
1315     UChar *u;
1316     UErrorCode errorCode;
1317     UBool isAmbiguous;
1318
1319     /* try to convert an 'a' and a US-ASCII backslash */
1320     errorCode=U_ZERO_ERROR;
1321     s=inBytes;
1322     u=outUnicode;
1323     ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
1324     if(U_FAILURE(errorCode)) {
1325         /* we do not care about general failures in this test; the input may just not be mappable */
1326         return;
1327     }
1328
1329     if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
1330         /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1331         return;
1332     }
1333
1334     isAmbiguous=ucnv_isAmbiguous(cnv);
1335
1336     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1337     if((outUnicode[1]!=0x5c)!=isAmbiguous) {
1338         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1339             ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
1340         return;
1341     }
1342
1343     if(outUnicode[1]!=0x5c) {
1344         /* needs fixup, fix it */
1345         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1346         if(outUnicode[1]!=0x5c) {
1347             /* the fix failed */
1348             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1349             return;
1350         }
1351     }
1352 }
1353
1354 static void TestAmbiguous()
1355 {
1356     UErrorCode status = U_ZERO_ERROR;
1357     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1358     const char target[] = {
1359         /* "\\usr\\local\\share\\data\\icutest.txt" */
1360         0x5c, 0x75, 0x73, 0x72,
1361         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1362         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1363         0x5c, 0x64, 0x61, 0x74, 0x61,
1364         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1365         0
1366     };
1367     UChar asciiResult[200], sjisResult[200];
1368     int32_t asciiLength = 0, sjisLength = 0, i;
1369     const char *name;
1370
1371     /* enumerate all converters */
1372     status=U_ZERO_ERROR;
1373     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1374         cnv=ucnv_open(name, &status);
1375         if(U_SUCCESS(status)) {
1376             TestAmbiguousConverter(cnv);
1377             ucnv_close(cnv);
1378         } else {
1379             log_err("error: unable to open available converter \"%s\"\n", name);
1380             status=U_ZERO_ERROR;
1381         }
1382     }
1383
1384     sjis_cnv = ucnv_open("ibm-943", &status);
1385     if (U_FAILURE(status))
1386     {
1387         log_data_err("Failed to create a SJIS converter\n");
1388         return;
1389     }
1390     ascii_cnv = ucnv_open("LATIN-1", &status);
1391     if (U_FAILURE(status))
1392     {
1393         log_data_err("Failed to create a LATIN-1 converter\n");
1394         ucnv_close(sjis_cnv);
1395         return;
1396     }
1397     /* convert target from SJIS to Unicode */
1398     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1399     if (U_FAILURE(status))
1400     {
1401         log_err("Failed to convert the SJIS string.\n");
1402         ucnv_close(sjis_cnv);
1403         ucnv_close(ascii_cnv);
1404         return;
1405     }
1406     /* convert target from Latin-1 to Unicode */
1407     asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1408     if (U_FAILURE(status))
1409     {
1410         log_err("Failed to convert the Latin-1 string.\n");
1411         free(sjisResult);
1412         ucnv_close(sjis_cnv);
1413         ucnv_close(ascii_cnv);
1414         return;
1415     }
1416     if (!ucnv_isAmbiguous(sjis_cnv))
1417     {
1418         log_err("SJIS converter should contain ambiguous character mappings.\n");
1419         free(sjisResult);
1420         free(asciiResult);
1421         ucnv_close(sjis_cnv);
1422         ucnv_close(ascii_cnv);
1423         return;
1424     }
1425     if (u_strcmp(sjisResult, asciiResult) == 0)
1426     {
1427         log_err("File separators for SJIS don't need to be fixed.\n");
1428     }
1429     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1430     if (u_strcmp(sjisResult, asciiResult) != 0)
1431     {
1432         log_err("Fixing file separator for SJIS failed.\n");
1433     }
1434     ucnv_close(sjis_cnv);
1435     ucnv_close(ascii_cnv);
1436 }
1437
1438 static void
1439 TestSignatureDetection(){
1440     /* with null terminated strings */
1441     {
1442         static const char* data[] = {
1443                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1444                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1445                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1446                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1447
1448                 "\xFE\xFF",             /* UTF-16BE */
1449                 "\xFF\xFE",             /* UTF-16LE */
1450                 "\xEF\xBB\xBF",         /* UTF-8    */
1451                 "\x0E\xFE\xFF",         /* SCSU     */
1452
1453                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1454                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1455                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1456                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1457
1458                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1459                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1460                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1461                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1462                 "\x2B\x2F\x76\x2F\x41"  /* UTF-7    */
1463         };
1464         static const char* expected[] = {
1465                 "UTF-16BE",
1466                 "UTF-16LE",
1467                 "UTF-8",
1468                 "SCSU",
1469
1470                 "UTF-16BE",
1471                 "UTF-16LE",
1472                 "UTF-8",
1473                 "SCSU",
1474
1475                 "UTF-16BE",
1476                 "UTF-16LE",
1477                 "UTF-8",
1478                 "SCSU",
1479
1480                 "UTF-7",
1481                 "UTF-7",
1482                 "UTF-7",
1483                 "UTF-7",
1484                 "UTF-7"
1485         };
1486         static const int32_t expectedLength[] ={
1487             2,
1488             2,
1489             3,
1490             3,
1491
1492             2,
1493             2,
1494             3,
1495             3,
1496
1497             2,
1498             2,
1499             3,
1500             3,
1501
1502             5,
1503             4,
1504             4,
1505             4,
1506             4
1507         };
1508         int i=0;
1509         UErrorCode err;
1510         int32_t signatureLength = -1;
1511         const char* source = NULL;
1512         const char* enc = NULL;
1513         for( ; i<sizeof(data)/sizeof(char*); i++){
1514             err = U_ZERO_ERROR;
1515             source = data[i];
1516             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1517             if(U_FAILURE(err)){
1518                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1519                 continue;
1520             }
1521             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1522                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1523                 continue;
1524             }
1525             if(signatureLength != expectedLength[i]){
1526                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1527             }
1528         }
1529     }
1530     {
1531         static const char* data[] = {
1532                 "\xFE\xFF\x00",         /* UTF-16BE */
1533                 "\xFF\xFE\x00",         /* UTF-16LE */
1534                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1535                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1536                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1537                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1538                 "\xFE\xFF",             /* UTF-16BE */
1539                 "\xFF\xFE",             /* UTF-16LE */
1540                 "\xEF\xBB\xBF",         /* UTF-8    */
1541                 "\x0E\xFE\xFF",         /* SCSU     */
1542                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1543                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1544                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1545                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1546                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1547                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1548                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1549                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1550                 "\xFB\xEE\x28",         /* BOCU-1   */
1551                 "\xFF\x41\x42"          /* NULL     */
1552         };
1553         static const int len[] = {
1554             3,
1555             3,
1556             4,
1557             4,
1558             4,
1559             4,
1560             2,
1561             2,
1562             3,
1563             3,
1564             4,
1565             4,
1566             4,
1567             4,
1568             4,
1569             4,
1570             5,
1571             5,
1572             3,
1573             3
1574         };
1575
1576         static const char* expected[] = {
1577                 "UTF-16BE",
1578                 "UTF-16LE",
1579                 "UTF-8",
1580                 "SCSU",
1581                 "UTF-32BE",
1582                 "UTF-32LE",
1583                 "UTF-16BE",
1584                 "UTF-16LE",
1585                 "UTF-8",
1586                 "SCSU",
1587                 "UTF-32BE",
1588                 "UTF-32LE",
1589                 "UTF-16BE",
1590                 "UTF-16LE",
1591                 "UTF-8",
1592                 "SCSU",
1593                 "UTF-32BE",
1594                 "UTF-32LE",
1595                 "BOCU-1",
1596                 NULL
1597         };
1598         static const int32_t expectedLength[] ={
1599             2,
1600             2,
1601             3,
1602             3,
1603             4,
1604             4,
1605             2,
1606             2,
1607             3,
1608             3,
1609             4,
1610             4,
1611             2,
1612             2,
1613             3,
1614             3,
1615             4,
1616             4,
1617             3,
1618             0
1619         };
1620         int i=0;
1621         UErrorCode err;
1622         int32_t signatureLength = -1;
1623         int32_t sourceLength=-1;
1624         const char* source = NULL;
1625         const char* enc = NULL;
1626         for( ; i<sizeof(data)/sizeof(char*); i++){
1627             err = U_ZERO_ERROR;
1628             source = data[i];
1629             sourceLength = len[i];
1630             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1631             if(U_FAILURE(err)){
1632                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1633                 continue;
1634             }
1635             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1636                 if(expected[i] !=NULL){
1637                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1638                  continue;
1639                 }
1640             }
1641             if(signatureLength != expectedLength[i]){
1642                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1643             }
1644         }
1645     }
1646 }
1647
1648 void
1649 static TestUTF7() {
1650     /* test input */
1651     static const uint8_t in[]={
1652         /* H - +Jjo- - ! +- +2AHcAQ */
1653         0x48,
1654         0x2d,
1655         0x2b, 0x4a, 0x6a, 0x6f,
1656         0x2d, 0x2d,
1657         0x21,
1658         0x2b, 0x2d,
1659         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1660     };
1661
1662     /* expected test results */
1663     static const uint32_t results[]={
1664         /* number of bytes read, code point */
1665         1, 0x48,
1666         1, 0x2d,
1667         4, 0x263a, /* <WHITE SMILING FACE> */
1668         2, 0x2d,
1669         1, 0x21,
1670         2, 0x2b,
1671         7, 0x10401
1672     };
1673
1674     const char *cnvName;
1675     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1676     UErrorCode errorCode=U_ZERO_ERROR;
1677     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1678     if(U_FAILURE(errorCode)) {
1679         log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1680         return;
1681     }
1682     TestNextUChar(cnv, source, limit, results, "UTF-7");
1683     /* Test the condition when source >= sourceLimit */
1684     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1685     cnvName = ucnv_getName(cnv, &errorCode);
1686     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1687         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1688     }
1689     ucnv_close(cnv);
1690 }
1691
1692 void
1693 static TestIMAP() {
1694     /* test input */
1695     static const uint8_t in[]={
1696         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1697         0x48,
1698         0x2d,
1699         0x26, 0x4a, 0x6a, 0x6f,
1700         0x2d, 0x2d,
1701         0x21,
1702         0x26, 0x2d,
1703         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1704     };
1705
1706     /* expected test results */
1707     static const uint32_t results[]={
1708         /* number of bytes read, code point */
1709         1, 0x48,
1710         1, 0x2d,
1711         4, 0x263a, /* <WHITE SMILING FACE> */
1712         2, 0x2d,
1713         1, 0x21,
1714         2, 0x26,
1715         7, 0x10401
1716     };
1717
1718     const char *cnvName;
1719     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1720     UErrorCode errorCode=U_ZERO_ERROR;
1721     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1722     if(U_FAILURE(errorCode)) {
1723         log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1724         return;
1725     }
1726     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1727     /* Test the condition when source >= sourceLimit */
1728     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1729     cnvName = ucnv_getName(cnv, &errorCode);
1730     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1731         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1732     }
1733     ucnv_close(cnv);
1734 }
1735
1736 void
1737 static TestUTF8() {
1738     /* test input */
1739     static const uint8_t in[]={
1740         0x61,
1741         0xc2, 0x80,
1742         0xe0, 0xa0, 0x80,
1743         0xf0, 0x90, 0x80, 0x80,
1744         0xf4, 0x84, 0x8c, 0xa1,
1745         0xf0, 0x90, 0x90, 0x81
1746     };
1747
1748     /* expected test results */
1749     static const uint32_t results[]={
1750         /* number of bytes read, code point */
1751         1, 0x61,
1752         2, 0x80,
1753         3, 0x800,
1754         4, 0x10000,
1755         4, 0x104321,
1756         4, 0x10401
1757     };
1758
1759     /* error test input */
1760     static const uint8_t in2[]={
1761         0x61,
1762         0xc0, 0x80,                     /* illegal non-shortest form */
1763         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1764         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1765         0xc0, 0xc0,                     /* illegal trail byte */
1766         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1767         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1768         0xfe,                           /* illegal byte altogether */
1769         0x62
1770     };
1771
1772     /* expected error test results */
1773     static const uint32_t results2[]={
1774         /* number of bytes read, code point */
1775         1, 0x61,
1776         22, 0x62
1777     };
1778
1779     UConverterToUCallback cb;
1780     const void *p;
1781
1782     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1783     UErrorCode errorCode=U_ZERO_ERROR;
1784     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1785     if(U_FAILURE(errorCode)) {
1786         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1787         return;
1788     }
1789     TestNextUChar(cnv, source, limit, results, "UTF-8");
1790     /* Test the condition when source >= sourceLimit */
1791     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1792
1793     /* test error behavior with a skip callback */
1794     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1795     source=(const char *)in2;
1796     limit=(const char *)(in2+sizeof(in2));
1797     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1798
1799     ucnv_close(cnv);
1800 }
1801
1802 void
1803 static TestCESU8() {
1804     /* test input */
1805     static const uint8_t in[]={
1806         0x61,
1807         0xc2, 0x80,
1808         0xe0, 0xa0, 0x80,
1809         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1810         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1811         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1812         0xef, 0xbf, 0xbc
1813     };
1814
1815     /* expected test results */
1816     static const uint32_t results[]={
1817         /* number of bytes read, code point */
1818         1, 0x61,
1819         2, 0x80,
1820         3, 0x800,
1821         6, 0x10000,
1822         3, 0xdc01,
1823         3, 0xd802,
1824         6, 0x10ffff,
1825         3, 0xfffc
1826     };
1827
1828     /* error test input */
1829     static const uint8_t in2[]={
1830         0x61,
1831         0xc0, 0x80,                     /* illegal non-shortest form */
1832         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1833         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1834         0xc0, 0xc0,                     /* illegal trail byte */
1835         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1836         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1837         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1838         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1839         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1840         0xfe,                           /* illegal byte altogether */
1841         0x62
1842     };
1843
1844     /* expected error test results */
1845     static const uint32_t results2[]={
1846         /* number of bytes read, code point */
1847         1, 0x61,
1848         34, 0x62
1849     };
1850
1851     UConverterToUCallback cb;
1852     const void *p;
1853
1854     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1855     UErrorCode errorCode=U_ZERO_ERROR;
1856     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1857     if(U_FAILURE(errorCode)) {
1858         log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1859         return;
1860     }
1861     TestNextUChar(cnv, source, limit, results, "CESU-8");
1862     /* Test the condition when source >= sourceLimit */
1863     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1864
1865     /* test error behavior with a skip callback */
1866     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1867     source=(const char *)in2;
1868     limit=(const char *)(in2+sizeof(in2));
1869     TestNextUChar(cnv, source, limit, results2, "CESU-8");
1870
1871     ucnv_close(cnv);
1872 }
1873
1874 void
1875 static TestUTF16() {
1876     /* test input */
1877     static const uint8_t in1[]={
1878         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1879     };
1880     static const uint8_t in2[]={
1881         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1882     };
1883     static const uint8_t in3[]={
1884         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1885     };
1886
1887     /* expected test results */
1888     static const uint32_t results1[]={
1889         /* number of bytes read, code point */
1890         4, 0x4e00,
1891         2, 0xfeff
1892     };
1893     static const uint32_t results2[]={
1894         /* number of bytes read, code point */
1895         4, 0x004e,
1896         2, 0xfffe
1897     };
1898     static const uint32_t results3[]={
1899         /* number of bytes read, code point */
1900         2, 0xfefe,
1901         2, 0x4e00,
1902         2, 0xfeff,
1903         4, 0x20001
1904     };
1905
1906     const char *source, *limit;
1907
1908     UErrorCode errorCode=U_ZERO_ERROR;
1909     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
1910     if(U_FAILURE(errorCode)) {
1911         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
1912         return;
1913     }
1914
1915     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
1916     TestNextUChar(cnv, source, limit, results1, "UTF-16");
1917
1918     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
1919     ucnv_resetToUnicode(cnv);
1920     TestNextUChar(cnv, source, limit, results2, "UTF-16");
1921
1922     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
1923     ucnv_resetToUnicode(cnv);
1924     TestNextUChar(cnv, source, limit, results3, "UTF-16");
1925
1926     /* Test the condition when source >= sourceLimit */
1927     ucnv_resetToUnicode(cnv);
1928     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1929
1930     ucnv_close(cnv);
1931 }
1932
1933 void
1934 static TestUTF16BE() {
1935     /* test input */
1936     static const uint8_t in[]={
1937         0x00, 0x61,
1938         0x00, 0xc0,
1939         0x00, 0x31,
1940         0x00, 0xf4,
1941         0xce, 0xfe,
1942         0xd8, 0x01, 0xdc, 0x01
1943     };
1944
1945     /* expected test results */
1946     static const uint32_t results[]={
1947         /* number of bytes read, code point */
1948         2, 0x61,
1949         2, 0xc0,
1950         2, 0x31,
1951         2, 0xf4,
1952         2, 0xcefe,
1953         4, 0x10401
1954     };
1955
1956     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1957     UErrorCode errorCode=U_ZERO_ERROR;
1958     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
1959     if(U_FAILURE(errorCode)) {
1960         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
1961         return;
1962     }
1963     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
1964     /* Test the condition when source >= sourceLimit */
1965     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1966     /*Test for the condition where there is an invalid character*/
1967     {
1968         static const uint8_t source2[]={0x61};
1969         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
1970     }
1971     /*Test for the condition where there is a surrogate pair*/
1972     {
1973         const uint8_t source2[]={0xd8, 0x01};
1974         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
1975     }
1976     ucnv_close(cnv);
1977 }
1978
1979 static void
1980 TestUTF16LE() {
1981     /* test input */
1982     static const uint8_t in[]={
1983         0x61, 0x00,
1984         0x31, 0x00,
1985         0x4e, 0x2e,
1986         0x4e, 0x00,
1987         0x01, 0xd8, 0x01, 0xdc
1988     };
1989
1990     /* expected test results */
1991     static const uint32_t results[]={
1992         /* number of bytes read, code point */
1993         2, 0x61,
1994         2, 0x31,
1995         2, 0x2e4e,
1996         2, 0x4e,
1997         4, 0x10401
1998     };
1999
2000     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2001     UErrorCode errorCode=U_ZERO_ERROR;
2002     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2003     if(U_FAILURE(errorCode)) {
2004         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2005         return;
2006     }
2007     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2008     /* Test the condition when source >= sourceLimit */
2009     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2010     /*Test for the condition where there is an invalid character*/
2011     {
2012         static const uint8_t source2[]={0x61};
2013         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2014     }
2015     /*Test for the condition where there is a surrogate character*/
2016     {
2017         static const uint8_t source2[]={0x01, 0xd8};
2018         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2019     }
2020
2021     ucnv_close(cnv);
2022 }
2023
2024 void
2025 static TestUTF32() {
2026     /* test input */
2027     static const uint8_t in1[]={
2028         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2029     };
2030     static const uint8_t in2[]={
2031         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2032     };
2033     static const uint8_t in3[]={
2034         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2035     };
2036
2037     /* expected test results */
2038     static const uint32_t results1[]={
2039         /* number of bytes read, code point */
2040         8, 0x100f00,
2041         4, 0xfeff
2042     };
2043     static const uint32_t results2[]={
2044         /* number of bytes read, code point */
2045         8, 0x0f1000,
2046         4, 0xfffe
2047     };
2048     static const uint32_t results3[]={
2049         /* number of bytes read, code point */
2050         4, 0xfefe,
2051         4, 0x100f00,
2052         4, 0xd840,
2053         4, 0xdc01
2054     };
2055
2056     const char *source, *limit;
2057
2058     UErrorCode errorCode=U_ZERO_ERROR;
2059     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2060     if(U_FAILURE(errorCode)) {
2061         log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2062         return;
2063     }
2064
2065     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2066     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2067
2068     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2069     ucnv_resetToUnicode(cnv);
2070     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2071
2072     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2073     ucnv_resetToUnicode(cnv);
2074     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2075
2076     /* Test the condition when source >= sourceLimit */
2077     ucnv_resetToUnicode(cnv);
2078     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2079
2080     ucnv_close(cnv);
2081 }
2082
2083 static void
2084 TestUTF32BE() {
2085     /* test input */
2086     static const uint8_t in[]={
2087         0x00, 0x00, 0x00, 0x61,
2088         0x00, 0x00, 0xdc, 0x00,
2089         0x00, 0x00, 0xd8, 0x00,
2090         0x00, 0x00, 0xdf, 0xff,
2091         0x00, 0x00, 0xff, 0xfd,
2092         0x00, 0x10, 0xab, 0xcd,
2093         0x00, 0x10, 0xff, 0xff
2094     };
2095
2096     /* expected test results */
2097     static const uint32_t results[]={
2098         /* number of bytes read, code point */
2099         4, 0x61,
2100         4, 0xdc00,
2101         4, 0xd800,
2102         4, 0xdfff,
2103         4, 0xfffd,
2104         4, 0x10abcd,
2105         4, 0x10ffff
2106     };
2107
2108     /* error test input */
2109     static const uint8_t in2[]={
2110         0x00, 0x00, 0x00, 0x61,
2111         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2112         0x00, 0x00, 0x00, 0x62,
2113         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2114         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2115         0x00, 0x00, 0x01, 0x62,
2116         0x00, 0x00, 0x02, 0x62
2117     };
2118
2119     /* expected error test results */
2120     static const uint32_t results2[]={
2121         /* number of bytes read, code point */
2122         4,  0x61,
2123         8,  0x62,
2124         12, 0x162,
2125         4,  0x262
2126     };
2127
2128     UConverterToUCallback cb;
2129     const void *p;
2130
2131     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2132     UErrorCode errorCode=U_ZERO_ERROR;
2133     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2134     if(U_FAILURE(errorCode)) {
2135         log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2136         return;
2137     }
2138     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2139
2140     /* Test the condition when source >= sourceLimit */
2141     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2142
2143     /* test error behavior with a skip callback */
2144     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2145     source=(const char *)in2;
2146     limit=(const char *)(in2+sizeof(in2));
2147     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2148
2149     ucnv_close(cnv);
2150 }
2151
2152 static void
2153 TestUTF32LE() {
2154     /* test input */
2155     static const uint8_t in[]={
2156         0x61, 0x00, 0x00, 0x00,
2157         0x00, 0xdc, 0x00, 0x00,
2158         0x00, 0xd8, 0x00, 0x00,
2159         0xff, 0xdf, 0x00, 0x00,
2160         0xfd, 0xff, 0x00, 0x00,
2161         0xcd, 0xab, 0x10, 0x00,
2162         0xff, 0xff, 0x10, 0x00
2163     };
2164
2165     /* expected test results */
2166     static const uint32_t results[]={
2167         /* number of bytes read, code point */
2168         4, 0x61,
2169         4, 0xdc00,
2170         4, 0xd800,
2171         4, 0xdfff,
2172         4, 0xfffd,
2173         4, 0x10abcd,
2174         4, 0x10ffff
2175     };
2176
2177     /* error test input */
2178     static const uint8_t in2[]={
2179         0x61, 0x00, 0x00, 0x00,
2180         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2181         0x62, 0x00, 0x00, 0x00,
2182         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2183         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2184         0x62, 0x01, 0x00, 0x00,
2185         0x62, 0x02, 0x00, 0x00,
2186     };
2187
2188     /* expected error test results */
2189     static const uint32_t results2[]={
2190         /* number of bytes read, code point */
2191         4,  0x61,
2192         8,  0x62,
2193         12, 0x162,
2194         4,  0x262,
2195     };
2196
2197     UConverterToUCallback cb;
2198     const void *p;
2199
2200     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2201     UErrorCode errorCode=U_ZERO_ERROR;
2202     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2203     if(U_FAILURE(errorCode)) {
2204         log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2205         return;
2206     }
2207     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2208
2209     /* Test the condition when source >= sourceLimit */
2210     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2211
2212     /* test error behavior with a skip callback */
2213     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2214     source=(const char *)in2;
2215     limit=(const char *)(in2+sizeof(in2));
2216     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2217
2218     ucnv_close(cnv);
2219 }
2220
2221 static void
2222 TestLATIN1() {
2223     /* test input */
2224     static const uint8_t in[]={
2225        0x61,
2226        0x31,
2227        0x32,
2228        0xc0,
2229        0xf0,
2230        0xf4,
2231     };
2232
2233     /* expected test results */
2234     static const uint32_t results[]={
2235         /* number of bytes read, code point */
2236         1, 0x61,
2237         1, 0x31,
2238         1, 0x32,
2239         1, 0xc0,
2240         1, 0xf0,
2241         1, 0xf4,
2242     };
2243     static const uint16_t in1[] = {
2244         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2245         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2246         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2247         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2248         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2249         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2250         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2251         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2252         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2253         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2254         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2255         0xcb, 0x82
2256     };
2257     static const uint8_t out1[] = {
2258         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2259         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2260         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2261         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2262         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2263         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2264         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2265         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2266         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2267         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2268         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2269         0xcb, 0x82
2270     };
2271     static const uint16_t in2[]={
2272         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2273         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2274         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2275         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2276         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2277         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2278         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2279         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2280         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2281         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2282         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2283         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2284         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2285         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2286         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2287         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2288         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2289         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2290         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2291         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2292         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2293         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2294         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2295         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2296         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2297         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2298         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2299         0x37, 0x20, 0x2A, 0x2F,
2300     };
2301     static const unsigned char out2[]={
2302         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2303         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2304         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2305         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2306         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2307         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2308         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2309         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2310         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2311         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2312         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2313         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2314         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2315         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2316         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2317         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2318         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2319         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2320         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2321         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2322         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2323         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2324         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2325         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2326         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2327         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2328         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2329         0x37, 0x20, 0x2A, 0x2F,
2330     };
2331     const char *source=(const char *)in;
2332     const char *limit=(const char *)in+sizeof(in);
2333
2334     UErrorCode errorCode=U_ZERO_ERROR;
2335     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2336     if(U_FAILURE(errorCode)) {
2337         log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2338         return;
2339     }
2340     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2341     /* Test the condition when source >= sourceLimit */
2342     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2343     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2344     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2345
2346     ucnv_close(cnv);
2347 }
2348
2349 static void
2350 TestSBCS() {
2351     /* test input */
2352     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2353     /* expected test results */
2354     static const uint32_t results[]={
2355         /* number of bytes read, code point */
2356         1, 0x61,
2357         1, 0xbf,
2358         1, 0xc4,
2359         1, 0x2021,
2360         1, 0xf8ff,
2361         1, 0x00d9
2362     };
2363
2364     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2365     UErrorCode errorCode=U_ZERO_ERROR;
2366     UConverter *cnv=ucnv_open("ibm-1281", &errorCode);
2367     if(U_FAILURE(errorCode)) {
2368         log_data_err("Unable to open a SBCS(ibm-1281) converter: %s\n", u_errorName(errorCode));
2369         return;
2370     }
2371     TestNextUChar(cnv, source, limit, results, "SBCS(ibm-1281)");
2372     /* Test the condition when source >= sourceLimit */
2373     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2374     /*Test for Illegal character */ /*
2375     {
2376     static const uint8_t input1[]={ 0xA1 };
2377     const char* illegalsource=(const char*)input1;
2378     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2379     }
2380    */
2381     ucnv_close(cnv);
2382 }
2383
2384 static void
2385 TestDBCS() {
2386     /* test input */
2387     static const uint8_t in[]={
2388         0x44, 0x6a,
2389         0xc4, 0x9c,
2390         0x7a, 0x74,
2391         0x46, 0xab,
2392         0x42, 0x5b,
2393
2394     };
2395
2396     /* expected test results */
2397     static const uint32_t results[]={
2398         /* number of bytes read, code point */
2399         2, 0x00a7,
2400         2, 0xe1d2,
2401         2, 0x6962,
2402         2, 0xf842,
2403         2, 0xffe5,
2404     };
2405
2406     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2407     UErrorCode errorCode=U_ZERO_ERROR;
2408
2409     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2410     if(U_FAILURE(errorCode)) {
2411         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2412         return;
2413     }
2414     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2415     /* Test the condition when source >= sourceLimit */
2416     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2417     /*Test for the condition where we have a truncated char*/
2418     {
2419         static const uint8_t source1[]={0xc4};
2420         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2421     }
2422     /*Test for the condition where there is an invalid character*/
2423     {
2424         static const uint8_t source2[]={0x1a, 0x1b};
2425         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2426     }
2427     ucnv_close(cnv);
2428 }
2429
2430 static void
2431 TestMBCS() {
2432     /* test input */
2433     static const uint8_t in[]={
2434         0x01,
2435         0xa6, 0xa3,
2436         0x00,
2437         0xa6, 0xa1,
2438         0x08,
2439         0xc2, 0x76,
2440         0xc2, 0x78,
2441
2442     };
2443
2444     /* expected test results */
2445     static const uint32_t results[]={
2446         /* number of bytes read, code point */
2447         1, 0x0001,
2448         2, 0x250c,
2449         1, 0x0000,
2450         2, 0x2500,
2451         1, 0x0008,
2452         2, 0xd60c,
2453         2, 0xd60e,
2454     };
2455
2456     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2457     UErrorCode errorCode=U_ZERO_ERROR;
2458
2459     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2460     if(U_FAILURE(errorCode)) {
2461         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2462         return;
2463     }
2464     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2465     /* Test the condition when source >= sourceLimit */
2466     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2467     /*Test for the condition where we have a truncated char*/
2468     {
2469         static const uint8_t source1[]={0xc4};
2470         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2471     }
2472     /*Test for the condition where there is an invalid character*/
2473     {
2474         static const uint8_t source2[]={0xa1, 0x01};
2475         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2476     }
2477     ucnv_close(cnv);
2478
2479 }
2480
2481 static void
2482 TestISO_2022() {
2483     /* test input */
2484     static const uint8_t in[]={
2485         0x1b, 0x25, 0x42, 0x31,
2486         0x32,
2487         0x61,
2488         0xc2, 0x80,
2489         0xe0, 0xa0, 0x80,
2490         0xf0, 0x90, 0x80, 0x80
2491     };
2492
2493
2494
2495     /* expected test results */
2496     static const uint32_t results[]={
2497         /* number of bytes read, code point */
2498         4, 0x0031,
2499         1, 0x0032,
2500         1, 0x61,
2501         2, 0x80,
2502         3, 0x800,
2503         4, 0x10000,
2504
2505     };
2506
2507     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2508     UErrorCode errorCode=U_ZERO_ERROR;
2509     UConverter *cnv;
2510
2511     cnv=ucnv_open("ISO_2022", &errorCode);
2512     if(U_FAILURE(errorCode)) {
2513         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2514         return;
2515     }
2516     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2517
2518     /* Test the condition when source >= sourceLimit */
2519     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2520     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2521     /*Test for the condition where we have a truncated char*/
2522     {
2523         static const uint8_t source1[]={0xc4};
2524         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2525     }
2526     /*Test for the condition where there is an invalid character*/
2527     {
2528         static const uint8_t source2[]={0xa1, 0x01};
2529         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2530     }
2531     ucnv_close(cnv);
2532 }
2533
2534 static void
2535 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2536     const UChar* uSource;
2537     const UChar* uSourceLimit;
2538     const char* cSource;
2539     const char* cSourceLimit;
2540     UChar *uTargetLimit =NULL;
2541     UChar *uTarget;
2542     char *cTarget;
2543     const char *cTargetLimit;
2544     char *cBuf;
2545     UChar *uBuf,*test;
2546     int32_t uBufSize = 120;
2547     int len=0;
2548     int i=2;
2549     UErrorCode errorCode=U_ZERO_ERROR;
2550     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2551     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2552     ucnv_reset(cnv);
2553     for(;--i>0; ){
2554         uSource = (UChar*) source;
2555         uSourceLimit=(const UChar*)sourceLimit;
2556         cTarget = cBuf;
2557         uTarget = uBuf;
2558         cSource = cBuf;
2559         cTargetLimit = cBuf;
2560         uTargetLimit = uBuf;
2561
2562         do{
2563
2564             cTargetLimit = cTargetLimit+ i;
2565             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2566             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2567                errorCode=U_ZERO_ERROR;
2568                 continue;
2569             }
2570
2571             if(U_FAILURE(errorCode)){
2572                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2573                 return;
2574             }
2575
2576         }while (uSource<uSourceLimit);
2577
2578         cSourceLimit =cTarget;
2579         do{
2580             uTargetLimit=uTargetLimit+i;
2581             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2582             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2583                errorCode=U_ZERO_ERROR;
2584                 continue;
2585             }
2586             if(U_FAILURE(errorCode)){
2587                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2588                     return;
2589             }
2590         }while(cSource<cSourceLimit);
2591
2592         uSource = source;
2593         test =uBuf;
2594         for(len=0;len<(int)(source - sourceLimit);len++){
2595             if(uBuf[len]!=uSource[len]){
2596                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2597             }
2598         }
2599     }
2600     free(uBuf);
2601     free(cBuf);
2602 }
2603 /* Test for Jitterbug 778 */
2604 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2605     const UChar* uSource;
2606     const UChar* uSourceLimit;
2607     const char* cSource;
2608     UChar *uTargetLimit =NULL;
2609     UChar *uTarget;
2610     char *cTarget;
2611     const char *cTargetLimit;
2612     char *cBuf;
2613     UChar *uBuf,*test;
2614     int32_t uBufSize = 120;
2615     int numCharsInTarget=0;
2616     UErrorCode errorCode=U_ZERO_ERROR;
2617     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2618     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2619     uSource = source;
2620     uSourceLimit=sourceLimit;
2621     cTarget = cBuf;
2622     cTargetLimit = cBuf +uBufSize*5;
2623     uTarget = uBuf;
2624     uTargetLimit = uBuf+ uBufSize*5;
2625     ucnv_reset(cnv);
2626     numCharsInTarget=ucnv_fromUChars( cnv , cTarget, (cTargetLimit-cTarget),uSource,(uSourceLimit-uSource), &errorCode);
2627     if(U_FAILURE(errorCode)){
2628         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2629         return;
2630     }
2631     cSource = cBuf;
2632     test =uBuf;
2633     ucnv_toUChars(cnv,uTarget,(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2634     if(U_FAILURE(errorCode)){
2635         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2636         return;
2637     }
2638     uSource = source;
2639     while(uSource<uSourceLimit){
2640         if(*test!=*uSource){
2641
2642             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2643         }
2644         uSource++;
2645         test++;
2646     }
2647     free(uBuf);
2648     free(cBuf);
2649 }
2650
2651 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2652     const UChar* uSource;
2653     const UChar* uSourceLimit;
2654     const char* cSource;
2655     const char* cSourceLimit;
2656     UChar *uTargetLimit =NULL;
2657     UChar *uTarget;
2658     char *cTarget;
2659     const char *cTargetLimit;
2660     char *cBuf;
2661     UChar *uBuf,*test;
2662     int32_t uBufSize = 120;
2663     int len=0;
2664     int i=2;
2665     const UChar *temp = sourceLimit;
2666     UErrorCode errorCode=U_ZERO_ERROR;
2667     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2668     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2669
2670     ucnv_reset(cnv);
2671     for(;--i>0;){
2672         uSource = (UChar*) source;
2673         cTarget = cBuf;
2674         uTarget = uBuf;
2675         cSource = cBuf;
2676         cTargetLimit = cBuf;
2677         uTargetLimit = uBuf+uBufSize*5;
2678         cTargetLimit = cTargetLimit+uBufSize*10;
2679         uSourceLimit=uSource;
2680         do{
2681
2682             if (uSourceLimit < sourceLimit) {
2683                 uSourceLimit = uSourceLimit+1;
2684             }
2685             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2686             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2687                errorCode=U_ZERO_ERROR;
2688                 continue;
2689             }
2690
2691             if(U_FAILURE(errorCode)){
2692                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2693                 return;
2694             }
2695
2696         }while (uSource<temp);
2697
2698         cSourceLimit =cBuf;
2699         do{
2700             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2701                 cSourceLimit = cSourceLimit+1;
2702             }
2703             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2704             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2705                errorCode=U_ZERO_ERROR;
2706                 continue;
2707             }
2708             if(U_FAILURE(errorCode)){
2709                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2710                     return;
2711             }
2712         }while(cSource<cTarget);
2713
2714         uSource = source;
2715         test =uBuf;
2716         for(;len<(int)(source - sourceLimit);len++){
2717             if(uBuf[len]!=uSource[len]){
2718                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2719             }
2720         }
2721     }
2722     free(uBuf);
2723     free(cBuf);
2724 }
2725 static void
2726 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2727                      const uint16_t results[], const char* message){
2728      const char* s0;
2729      const char* s=(char*)source;
2730      const uint16_t *r=results;
2731      UErrorCode errorCode=U_ZERO_ERROR;
2732      uint32_t c,exC;
2733      ucnv_reset(cnv);
2734      while(s<limit) {
2735         s0=s;
2736         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2737         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2738             break; /* no more significant input */
2739         } else if(U_FAILURE(errorCode)) {
2740             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2741             break;
2742         } else {
2743             if(UTF_IS_FIRST_SURROGATE(*r)){
2744                 int i =0, len = 2;
2745                 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2746                 r++;
2747             }else{
2748                 exC = *r;
2749             }
2750             if(c!=(uint32_t)(exC))
2751                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2752         }
2753         r++;
2754     }
2755 }
2756
2757 static int TestJitterbug930(const char* enc){
2758     UErrorCode err = U_ZERO_ERROR;
2759     UConverter*converter;
2760     char out[80];
2761     char*target = out;
2762     UChar in[4];
2763     const UChar*source = in;
2764     int32_t off[80];
2765     int32_t* offsets = off;
2766     int numOffWritten=0;
2767     UBool flush = 0;
2768     converter = my_ucnv_open(enc, &err);
2769
2770     in[0] = 0x41;     /* 0x4E00;*/
2771     in[1] = 0x4E01;
2772     in[2] = 0x4E02;
2773     in[3] = 0x4E03;
2774
2775     memset(off, '*', sizeof(off));
2776
2777     ucnv_fromUnicode (converter,
2778             &target,
2779             target+2,
2780             &source,
2781             source+3,
2782             offsets,
2783             flush,
2784             &err);
2785
2786         /* writes three bytes into the output buffer: 41 1B 24
2787         * but offsets contains 0 1 1
2788     */
2789     while(*offsets< off[10]){
2790         numOffWritten++;
2791         offsets++;
2792     }
2793     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2794     if(numOffWritten!= (int)(target-out)){
2795         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2796     }
2797
2798     err = U_ZERO_ERROR;
2799
2800     memset(off,'*' , sizeof(off));
2801
2802     flush = 1;
2803     offsets=off;
2804     ucnv_fromUnicode (converter,
2805             &target,
2806             target+4,
2807             &source,
2808             source,
2809             offsets,
2810             flush,
2811             &err);
2812     numOffWritten=0;
2813     while(*offsets< off[10]){
2814         numOffWritten++;
2815         if(*offsets!= -1){
2816             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2817         }
2818         offsets++;
2819     }
2820
2821     /* writes 42 43 7A into output buffer,
2822      * offsets contains -1 -1 -1
2823      */
2824     ucnv_close(converter);
2825     return 0;
2826 }
2827
2828 static void
2829 TestHZ() {
2830     /* test input */
2831     static const uint16_t in[]={
2832             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2833             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2834             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2835             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2836             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2837             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2838             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2839             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2840             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2841             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2842             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2843             0x005A, 0x005B, 0x005C, 0x000A
2844       };
2845     const UChar* uSource;
2846     const UChar* uSourceLimit;
2847     const char* cSource;
2848     const char* cSourceLimit;
2849     UChar *uTargetLimit =NULL;
2850     UChar *uTarget;
2851     char *cTarget;
2852     const char *cTargetLimit;
2853     char *cBuf;
2854     UChar *uBuf,*test;
2855     int32_t uBufSize = 120;
2856     UErrorCode errorCode=U_ZERO_ERROR;
2857     UConverter *cnv;
2858     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
2859     int32_t* myOff= offsets;
2860     cnv=ucnv_open("HZ", &errorCode);
2861     if(U_FAILURE(errorCode)) {
2862         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
2863         return;
2864     }
2865
2866     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2867     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2868     uSource = (const UChar*)&in[0];
2869     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
2870     cTarget = cBuf;
2871     cTargetLimit = cBuf +uBufSize*5;
2872     uTarget = uBuf;
2873     uTargetLimit = uBuf+ uBufSize*5;
2874     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
2875     if(U_FAILURE(errorCode)){
2876         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2877         return;
2878     }
2879     cSource = cBuf;
2880     cSourceLimit =cTarget;
2881     test =uBuf;
2882     myOff=offsets;
2883     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
2884     if(U_FAILURE(errorCode)){
2885         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2886         return;
2887     }
2888     uSource = (const UChar*)&in[0];
2889     while(uSource<uSourceLimit){
2890         if(*test!=*uSource){
2891
2892             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2893         }
2894         uSource++;
2895         test++;
2896     }
2897     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
2898     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2899     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2900     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2901     TestJitterbug930("csISO2022JP");
2902     ucnv_close(cnv);
2903     free(offsets);
2904     free(uBuf);
2905     free(cBuf);
2906 }
2907
2908 static void
2909 TestISCII(){
2910         /* test input */
2911     static const uint16_t in[]={
2912         /* test full range of Devanagari */
2913         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
2914         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
2915         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
2916         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
2917         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
2918         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
2919         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
2920         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
2921         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
2922         0x096D,0x096E,0x096F,
2923         /* test Soft halant*/
2924         0x0915,0x094d, 0x200D,
2925         /* test explicit halant */
2926         0x0915,0x094d, 0x200c,
2927         /* test double danda */
2928         0x965,
2929         /* test ASCII */
2930         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2931         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2932         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2933         /* tests from Lotus */
2934         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
2935         0x0930,0x094D,0x200D,
2936         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
2937         0x0915,0x0921,0x002B,0x095F,
2938         /* tamil range */
2939         0x0B86, 0xB87, 0xB88,
2940         /* telugu range */
2941         0x0C05, 0x0C02, 0x0C03,0x0c31,
2942         /* kannada range */
2943         0x0C85, 0xC82, 0x0C83,
2944         /* test Abbr sign and Anudatta */
2945         0x0970, 0x952,
2946        /* 0x0958,
2947         0x0959,
2948         0x095A,
2949         0x095B,
2950         0x095C,
2951         0x095D,
2952         0x095E,
2953         0x095F,*/
2954         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
2955         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
2956         0x090C ,
2957         0x0962,
2958         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
2959         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
2960         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
2961         0x093D /* Avagraha  0xEA, 0xE9*/,
2962         0x0958,
2963         0x0959,
2964         0x095A,
2965         0x095B,
2966         0x095C,
2967         0x095D,
2968         0x095E,
2969         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
2970       };
2971     static const unsigned char byteArr[]={
2972
2973         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
2974         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
2975         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
2976         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
2977         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
2978         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
2979         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
2980         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
2981         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
2982         0xf8,0xf9,0xfa,
2983         /* test soft halant */
2984         0xb3, 0xE8, 0xE9,
2985         /* test explicit halant */
2986         0xb3, 0xE8, 0xE8,
2987         /* test double danda */
2988         0xea, 0xea,
2989         /* test ASCII */
2990         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2991         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2992         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2993         /* test ATR code */
2994
2995         /* tests from Lotus */
2996         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
2997         0xEF,0x42,0xCF,0xE8,0xD9,
2998         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
2999         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3000         /* tamil range */
3001         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3002         /* telugu range */
3003         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3004         /* kannada range */
3005         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3006         /* anudatta and abbreviation sign */
3007         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3008
3009
3010         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3011
3012         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3013
3014         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3015
3016         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3017
3018         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3019
3020         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3021
3022         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3023
3024         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3025
3026         0xB3, 0xE9, /* Ka + NUKTA */
3027
3028         0xB4, 0xE9, /* Kha + NUKTA */
3029
3030         0xB5, 0xE9, /* Ga + NUKTA */
3031
3032         0xBA, 0xE9,
3033
3034         0xBF, 0xE9,
3035
3036         0xC0, 0xE9,
3037
3038         0xC9, 0xE9,
3039         /* INV halant RA    */
3040         0xD9, 0xE8, 0xCF,
3041         0x00, 0x00A0,
3042         /* just consume unhandled codepoints */
3043         0xEF, 0x30,
3044
3045     };
3046     testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3047     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3048
3049 }
3050
3051 static void
3052 TestISO_2022_JP() {
3053     /* test input */
3054     static const uint16_t in[]={
3055         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3056         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3057         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3058         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3059         0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3060         0x201D, 0x3014, 0x000D, 0x000A,
3061         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3062         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3063         };
3064     const UChar* uSource;
3065     const UChar* uSourceLimit;
3066     const char* cSource;
3067     const char* cSourceLimit;
3068     UChar *uTargetLimit =NULL;
3069     UChar *uTarget;
3070     char *cTarget;
3071     const char *cTargetLimit;
3072     char *cBuf;
3073     UChar *uBuf,*test;
3074     int32_t uBufSize = 120;
3075     UErrorCode errorCode=U_ZERO_ERROR;
3076     UConverter *cnv;
3077     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3078     int32_t* myOff= offsets;
3079     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3080     if(U_FAILURE(errorCode)) {
3081         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3082         return;
3083     }
3084
3085     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3086     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3087     uSource = (const UChar*)&in[0];
3088     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3089     cTarget = cBuf;
3090     cTargetLimit = cBuf +uBufSize*5;
3091     uTarget = uBuf;
3092     uTargetLimit = uBuf+ uBufSize*5;
3093     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3094     if(U_FAILURE(errorCode)){
3095         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3096         return;
3097     }
3098     cSource = cBuf;
3099     cSourceLimit =cTarget;
3100     test =uBuf;
3101     myOff=offsets;
3102     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3103     if(U_FAILURE(errorCode)){
3104         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3105         return;
3106     }
3107
3108     uSource = (const UChar*)&in[0];
3109     while(uSource<uSourceLimit){
3110         if(*test!=*uSource){
3111
3112             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3113         }
3114         uSource++;
3115         test++;
3116     }
3117
3118     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3119     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3120     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3121     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3122     TestJitterbug930("csISO2022JP");
3123     ucnv_close(cnv);
3124     free(uBuf);
3125     free(cBuf);
3126     free(offsets);
3127 }
3128
3129 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3130     const UChar* uSource;
3131     const UChar* uSourceLimit;
3132     const char* cSource;
3133     const char* cSourceLimit;
3134     UChar *uTargetLimit =NULL;
3135     UChar *uTarget;
3136     char *cTarget;
3137     const char *cTargetLimit;
3138     char *cBuf;
3139     UChar *uBuf,*test;
3140     int32_t uBufSize = 120*10;
3141     UErrorCode errorCode=U_ZERO_ERROR;
3142     UConverter *cnv;
3143     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3144     int32_t* myOff= offsets;
3145     cnv=my_ucnv_open(conv, &errorCode);
3146     if(U_FAILURE(errorCode)) {
3147         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3148         return;
3149     }
3150
3151     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3152     cBuf =(char*)malloc(uBufSize * sizeof(char));
3153     uSource = (const UChar*)&in[0];
3154     uSourceLimit=uSource+len;
3155     cTarget = cBuf;
3156     cTargetLimit = cBuf +uBufSize;
3157     uTarget = uBuf;
3158     uTargetLimit = uBuf+ uBufSize;
3159     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3160     if(U_FAILURE(errorCode)){
3161         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3162         return;
3163     }
3164     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3165     cSource = cBuf;
3166     cSourceLimit =cTarget;
3167     test =uBuf;
3168     myOff=offsets;
3169     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3170     if(U_FAILURE(errorCode)){
3171         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3172         return;
3173     }
3174
3175     uSource = (const UChar*)&in[0];
3176     while(uSource<uSourceLimit){
3177         if(*test!=*uSource){
3178             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3179         }
3180         uSource++;
3181         test++;
3182     }
3183     TestSmallTargetBuffer(&in[0],(const UChar*)&in[len],cnv);
3184     TestSmallSourceBuffer(&in[0],(const UChar*)&in[len],cnv);
3185     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3186     if(byteArr && byteArrLen!=0){
3187         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3188         TestToAndFromUChars(&in[0],(const UChar*)&in[len],cnv);
3189         {
3190             cSource = byteArr;
3191             cSourceLimit = cSource+byteArrLen;
3192             test=uBuf;
3193             myOff = offsets;
3194             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3195             if(U_FAILURE(errorCode)){
3196                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3197                 return;
3198             }
3199
3200             uSource = (const UChar*)&in[0];
3201             while(uSource<uSourceLimit){
3202                 if(*test!=*uSource){
3203                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3204                 }
3205                 uSource++;
3206                 test++;
3207             }
3208         }
3209     }
3210
3211     ucnv_close(cnv);
3212     free(uBuf);
3213     free(cBuf);
3214     free(offsets);
3215 }
3216 static UChar U_CALLCONV
3217 _charAt(int32_t offset, void *context) {
3218     return ((char*)context)[offset];
3219 }
3220
3221 static int32_t
3222 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3223     int32_t srcIndex=0;
3224     int32_t dstIndex=0;
3225     if(U_FAILURE(*status)){
3226         return 0;
3227     }
3228     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3229         *status = U_ILLEGAL_ARGUMENT_ERROR;
3230         return 0;
3231     }
3232     if(srcLen==-1){
3233         srcLen = uprv_strlen(src);
3234     }
3235
3236     for (; srcIndex<srcLen; ) {
3237         UChar32 c = src[srcIndex++];
3238         if (c == 0x005C /*'\\'*/) {
3239             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3240             if (c == (UChar32)0xFFFFFFFF) {
3241                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3242                 break; /* invalid escape sequence */
3243             }
3244         }
3245         if(dstIndex < dstLen){
3246             if(c>0xFFFF){
3247                dst[dstIndex++] = UTF16_LEAD(c);
3248                if(dstIndex<dstLen){
3249                     dst[dstIndex]=UTF16_TRAIL(c);
3250                }else{
3251                    *status=U_BUFFER_OVERFLOW_ERROR;
3252                }
3253             }else{
3254                 dst[dstIndex]=(UChar)c;
3255             }
3256
3257         }else{
3258             *status = U_BUFFER_OVERFLOW_ERROR;
3259         }
3260         dstIndex++; /* for preflighting */
3261     }
3262     return dstIndex;
3263 }
3264
3265 static void
3266 TestFullRoundtrip(const char* cp){
3267     UChar usource[10] ={0};
3268     UChar nsrc[10] = {0};
3269     uint32_t i=1;
3270     int len=0, ulen;
3271     nsrc[0]=0x0061;
3272     /* Test codepoint 0 */
3273     TestConv(usource,1,cp,"",NULL,0);
3274     TestConv(usource,2,cp,"",NULL,0);
3275     nsrc[2]=0x5555;
3276     TestConv(nsrc,3,cp,"",NULL,0);
3277
3278     for(;i<=0x10FFFF;i++){
3279         if(i==0xD800){
3280             i=0xDFFF;
3281             continue;
3282         }
3283         if(i<=0xFFFF){
3284             usource[0] =(UChar) i;
3285             len=1;
3286         }else{
3287             usource[0]=UTF16_LEAD(i);
3288             usource[1]=UTF16_TRAIL(i);
3289             len=2;
3290         }
3291         ulen=len;
3292         if(i==0x80) {
3293             usource[2]=0;
3294         }
3295         /* Test only single code points */
3296         TestConv(usource,ulen,cp,"",NULL,0);
3297         /* Test codepoint repeated twice */
3298         usource[ulen]=usource[0];
3299         usource[ulen+1]=usource[1];
3300         ulen+=len;
3301         TestConv(usource,ulen,cp,"",NULL,0);
3302         /* Test codepoint repeated 3 times */
3303         usource[ulen]=usource[0];
3304         usource[ulen+1]=usource[1];
3305         ulen+=len;
3306         TestConv(usource,ulen,cp,"",NULL,0);
3307         /* Test codepoint in between 2 codepoints */
3308         nsrc[1]=usource[0];
3309         nsrc[2]=usource[1];
3310         nsrc[len+1]=0x5555;
3311         TestConv(nsrc,len+2,cp,"",NULL,0);
3312         uprv_memset(usource,0,sizeof(UChar)*10);
3313     }
3314 }
3315
3316 static void
3317 TestRoundTrippingAllUTF(void){
3318     if(!QUICK){
3319         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3320         TestFullRoundtrip("BOCU-1");
3321         log_verbose("Running exhaustive round trip test for SCSU\n");
3322         TestFullRoundtrip("SCSU");
3323         log_verbose("Running exhaustive round trip test for UTF-8\n");
3324         TestFullRoundtrip("UTF-8");
3325         log_verbose("Running exhaustive round trip test for CESU-8\n");
3326         TestFullRoundtrip("CESU-8");
3327         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3328         TestFullRoundtrip("UTF-16BE");
3329         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3330         TestFullRoundtrip("UTF-16LE");
3331         log_verbose("Running exhaustive round trip test for UTF-16\n");
3332         TestFullRoundtrip("UTF-16");
3333         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3334         TestFullRoundtrip("UTF-32BE");
3335         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3336         TestFullRoundtrip("UTF-32LE");
3337         log_verbose("Running exhaustive round trip test for UTF-32\n");
3338         TestFullRoundtrip("UTF-32");
3339         log_verbose("Running exhaustive round trip test for UTF-7\n");
3340         TestFullRoundtrip("UTF-7");
3341         log_verbose("Running exhaustive round trip test for UTF-7\n");
3342         TestFullRoundtrip("UTF-7,version=1");
3343         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3344         TestFullRoundtrip("IMAP-mailbox-name");
3345         log_verbose("Running exhaustive round trip test for GB18030\n");
3346         TestFullRoundtrip("GB18030");
3347     }
3348 }
3349
3350 static void
3351 TestSCSU() {
3352
3353     static const uint16_t germanUTF16[]={
3354         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3355     };
3356
3357     static const uint8_t germanSCSU[]={
3358         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3359     };
3360
3361     static const uint16_t russianUTF16[]={
3362         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3363     };
3364
3365     static const uint8_t russianSCSU[]={
3366         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3367     };
3368
3369     static const uint16_t japaneseUTF16[]={
3370         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3371         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3372         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3373         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3374         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3375         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3376         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3377         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3378         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3379         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3380         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3381         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3382         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3383         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3384         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3385     };
3386
3387     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3388      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3389     static const uint8_t japaneseSCSU[]={
3390         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3391         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3392         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3393         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3394         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3395         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3396         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3397         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3398         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3399         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3400         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3401         0xcb, 0x82
3402     };
3403
3404     static const uint16_t allFeaturesUTF16[]={
3405         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3406         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3407         0x01df, 0xf000, 0xdbff, 0xdfff
3408     };
3409
3410     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3411      * result here (34B vs. 35B)
3412      */
3413     static const uint8_t allFeaturesSCSU[]={
3414         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3415         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3416         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3417         0xdf, 0x14, 0x80, 0x15, 0xff
3418     };
3419     static const uint16_t monkeyIn[]={
3420         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3421         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3422         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3423         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3424         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3425         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3426         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3427         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3428         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3429         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3430         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3431         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3432         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3433         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3434         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3435         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3436         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3437         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3438         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3439         /* test non-BMP code points */
3440         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3441         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3442         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3443         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3444         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3445         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3446         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3447         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3448         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3449         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3450         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3451
3452
3453         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3454         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3455         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3456         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3457         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3458     };
3459     static const char *fTestCases [] = {
3460           "\\ud800\\udc00", /* smallest surrogate*/
3461           "\\ud8ff\\udcff",
3462           "\\udBff\\udFff", /* largest surrogate pair*/
3463           "\\ud834\\udc00",
3464           "\\U0010FFFF",
3465           "Hello \\u9292 \\u9192 World!",
3466           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3467           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3468
3469           "\\u0648\\u06c8", /* catch missing reset*/
3470           "\\u0648\\u06c8",
3471
3472           "\\u4444\\uE001", /* lowest quotable*/
3473           "\\u4444\\uf2FF", /* highest quotable*/
3474           "\\u4444\\uf188\\u4444",
3475           "\\u4444\\uf188\\uf288",
3476           "\\u4444\\uf188abc\\u0429\\uf288",
3477           "\\u9292\\u2222",
3478           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3479           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3480           "Hello World!123456",
3481           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3482
3483           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3484           "abc\\u4411d",      /* uses SQU*/
3485           "abc\\u4411\\u4412d",/* uses SCU*/
3486           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3487           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3488           "\\u9292\\u2222",
3489           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3490           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3491           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3492
3493           "", /* empty input*/
3494           "\\u0000", /* smallest BMP character*/
3495           "\\uFFFF", /* largest BMP character*/
3496
3497           /* regression tests*/
3498           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3499           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3500           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3501           "\\u0041\\u00df\\u0401\\u015f",
3502           "\\u9066\\u2123abc",
3503           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3504           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3505     };
3506     int i=0;
3507     for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3508         const char* cSrc = fTestCases[i];
3509         UErrorCode status = U_ZERO_ERROR;
3510         int32_t cSrcLen,srcLen;
3511         UChar* src;
3512         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3513         cSrcLen= srcLen =  uprv_strlen(fTestCases[i]);
3514         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3515         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3516         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3517         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3518         free(src);
3519     }
3520     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3521     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3522     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3523     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3524     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3525     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3526     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3527 }
3528 static void TestJitterbug2346(){
3529     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3530                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3531     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3532
3533     UChar uTarget[500]={'\0'};
3534     UChar* utarget=uTarget;
3535     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3536
3537     char cTarget[500]={'\0'};
3538     char* ctarget=cTarget;
3539     char* ctargetLimit=cTarget+sizeof(cTarget);
3540     const char* csource=source;
3541     UChar* temp = expected;
3542     UErrorCode err=U_ZERO_ERROR;
3543
3544     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3545     if(U_FAILURE(err)) {
3546         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3547         return;
3548     }
3549     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3550     if(U_FAILURE(err)) {
3551         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3552         return;
3553     }
3554     utargetLimit=utarget;
3555     utarget = uTarget;
3556     while(utarget<utargetLimit){
3557         if(*temp!=*utarget){
3558
3559             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3560         }
3561         utarget++;
3562         temp++;
3563     }
3564     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3565     if(U_FAILURE(err)) {
3566         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3567         return;
3568     }
3569     ctargetLimit=ctarget;
3570     ctarget =cTarget;
3571     ucnv_close(conv);
3572
3573
3574 }
3575 static void
3576 TestISO_2022_JP_1() {
3577     /* test input */
3578     static const uint16_t in[]={
3579         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3580         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3581         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3582         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3583         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3584         0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3585         0x201D, 0x000D, 0x000A,
3586         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3587         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3588         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3589         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3590         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3591         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3592       };
3593     const UChar* uSource;
3594     const UChar* uSourceLimit;
3595     const char* cSource;
3596     const char* cSourceLimit;
3597     UChar *uTargetLimit =NULL;
3598     UChar *uTarget;
3599     char *cTarget;
3600     const char *cTargetLimit;
3601     char *cBuf;
3602     UChar *uBuf,*test;
3603     int32_t uBufSize = 120;
3604     UErrorCode errorCode=U_ZERO_ERROR;
3605     UConverter *cnv;
3606
3607     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3608     if(U_FAILURE(errorCode)) {
3609         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3610         return;
3611     }
3612
3613     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3614     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3615     uSource = (const UChar*)&in[0];
3616     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3617     cTarget = cBuf;
3618     cTargetLimit = cBuf +uBufSize*5;
3619     uTarget = uBuf;
3620     uTargetLimit = uBuf+ uBufSize*5;
3621     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3622     if(U_FAILURE(errorCode)){
3623         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3624         return;
3625     }
3626     cSource = cBuf;
3627     cSourceLimit =cTarget;
3628     test =uBuf;
3629     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3630     if(U_FAILURE(errorCode)){
3631         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3632         return;
3633     }
3634     uSource = (const UChar*)&in[0];
3635     while(uSource<uSourceLimit){
3636         if(*test!=*uSource){
3637
3638             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3639         }
3640         uSource++;
3641         test++;
3642     }
3643     /*ucnv_close(cnv);
3644     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3645     /*Test for the condition where there is an invalid character*/
3646     ucnv_reset(cnv);
3647     {
3648         static const uint8_t source2[]={0x0e,0x24,0x053};
3649         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3650     }
3651     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3652     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3653     ucnv_close(cnv);
3654     free(uBuf);
3655     free(cBuf);
3656 }
3657
3658 static void
3659 TestISO_2022_JP_2() {
3660     /* test input */
3661     static const uint16_t in[]={
3662         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3663         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3664         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3665         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3666         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3667         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3668         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3669         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3670         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3671         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3672         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3673         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3674         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3675         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3676         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3677         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3678         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3679         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3680         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3681       };
3682     const UChar* uSource;
3683     const UChar* uSourceLimit;
3684     const char* cSource;
3685     const char* cSourceLimit;
3686     UChar *uTargetLimit =NULL;
3687     UChar *uTarget;
3688     char *cTarget;
3689     const char *cTargetLimit;
3690     char *cBuf;
3691     UChar *uBuf,*test;
3692     int32_t uBufSize = 120;
3693     UErrorCode errorCode=U_ZERO_ERROR;
3694     UConverter *cnv;
3695     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3696     int32_t* myOff= offsets;
3697     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3698     if(U_FAILURE(errorCode)) {
3699         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3700         return;
3701     }
3702
3703     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3704     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3705     uSource = (const UChar*)&in[0];
3706     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3707     cTarget = cBuf;
3708     cTargetLimit = cBuf +uBufSize*5;
3709     uTarget = uBuf;
3710     uTargetLimit = uBuf+ uBufSize*5;
3711     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3712     if(U_FAILURE(errorCode)){
3713         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3714         return;
3715     }
3716     cSource = cBuf;
3717     cSourceLimit =cTarget;
3718     test =uBuf;
3719     myOff=offsets;
3720     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3721     if(U_FAILURE(errorCode)){
3722         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3723         return;
3724     }
3725     uSource = (const UChar*)&in[0];
3726     while(uSource<uSourceLimit){
3727         if(*test!=*uSource){
3728
3729             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3730         }
3731         uSource++;
3732         test++;
3733     }
3734     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3735     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3736     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3737     /*Test for the condition where there is an invalid character*/
3738     ucnv_reset(cnv);
3739     {
3740         static const uint8_t source2[]={0x0e,0x24,0x053};
3741         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3742     }
3743     ucnv_close(cnv);
3744     free(uBuf);
3745     free(cBuf);
3746     free(offsets);
3747 }
3748
3749 static void
3750 TestISO_2022_KR() {
3751     /* test input */
3752     static const uint16_t in[]={
3753                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3754                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3755                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3756                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3757                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3758                    ,0x53E3,0x53E4,0x000A,0x000D};
3759     const UChar* uSource;
3760     const UChar* uSourceLimit;
3761     const char* cSource;
3762     const char* cSourceLimit;
3763     UChar *uTargetLimit =NULL;
3764     UChar *uTarget;
3765     char *cTarget;
3766     const char *cTargetLimit;
3767     char *cBuf;
3768     UChar *uBuf,*test;
3769     int32_t uBufSize = 120;
3770     UErrorCode errorCode=U_ZERO_ERROR;
3771     UConverter *cnv;
3772     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3773     int32_t* myOff= offsets;
3774     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3775     if(U_FAILURE(errorCode)) {
3776         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3777         return;
3778     }
3779
3780     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3781     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3782     uSource = (const UChar*)&in[0];
3783     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3784     cTarget = cBuf;
3785     cTargetLimit = cBuf +uBufSize*5;
3786     uTarget = uBuf;
3787     uTargetLimit = uBuf+ uBufSize*5;
3788     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3789     if(U_FAILURE(errorCode)){
3790         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3791         return;
3792     }
3793     cSource = cBuf;
3794     cSourceLimit =cTarget;
3795     test =uBuf;
3796     myOff=offsets;
3797     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3798     if(U_FAILURE(errorCode)){
3799         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3800         return;
3801     }
3802     uSource = (const UChar*)&in[0];
3803     while(uSource<uSourceLimit){
3804         if(*test!=*uSource){
3805             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3806         }
3807         uSource++;
3808         test++;
3809     }
3810     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3811     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3812     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3813     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3814     TestJitterbug930("csISO2022KR");
3815     /*Test for the condition where there is an invalid character*/
3816     ucnv_reset(cnv);
3817     {
3818         static const uint8_t source2[]={0x1b,0x24,0x053};
3819         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3820     }
3821     ucnv_close(cnv);
3822     free(uBuf);
3823     free(cBuf);
3824     free(offsets);
3825 }
3826
3827 static void
3828 TestISO_2022_KR_1() {
3829     /* test input */
3830     static const uint16_t in[]={
3831                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3832                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3833                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3834                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3835                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3836                    ,0x53E3,0x53E4,0x000A,0x000D};
3837     const UChar* uSource;
3838     const UChar* uSourceLimit;
3839     const char* cSource;
3840     const char* cSourceLimit;
3841     UChar *uTargetLimit =NULL;
3842     UChar *uTarget;
3843     char *cTarget;
3844     const char *cTargetLimit;
3845     char *cBuf;
3846     UChar *uBuf,*test;
3847     int32_t uBufSize = 120;
3848     UErrorCode errorCode=U_ZERO_ERROR;
3849     UConverter *cnv;
3850     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3851     int32_t* myOff= offsets;
3852     cnv=ucnv_open("ibm-25546", &errorCode);
3853     if(U_FAILURE(errorCode)) {
3854         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3855         return;
3856     }
3857
3858     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3859     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3860     uSource = (const UChar*)&in[0];
3861     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3862     cTarget = cBuf;
3863     cTargetLimit = cBuf +uBufSize*5;
3864     uTarget = uBuf;
3865     uTargetLimit = uBuf+ uBufSize*5;
3866     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3867     if(U_FAILURE(errorCode)){
3868         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3869         return;
3870     }
3871     cSource = cBuf;
3872     cSourceLimit =cTarget;
3873     test =uBuf;
3874     myOff=offsets;
3875     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3876     if(U_FAILURE(errorCode)){
3877         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3878         return;
3879     }
3880     uSource = (const UChar*)&in[0];
3881     while(uSource<uSourceLimit){
3882         if(*test!=*uSource){
3883             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3884         }
3885         uSource++;
3886         test++;
3887     }
3888     ucnv_reset(cnv);
3889     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3890     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3891     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3892     ucnv_reset(cnv);
3893     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3894         /*Test for the condition where there is an invalid character*/
3895     ucnv_reset(cnv);
3896     {
3897         static const uint8_t source2[]={0x1b,0x24,0x053};
3898         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3899     }
3900     ucnv_close(cnv);
3901     free(uBuf);
3902     free(cBuf);
3903     free(offsets);
3904 }
3905
3906 static void TestJitterbug2411(){
3907     const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
3908                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
3909     UConverter* kr=NULL, *kr1=NULL;
3910     UErrorCode errorCode = U_ZERO_ERROR;
3911     UChar tgt[100]={'\0'};
3912     UChar* target = tgt;
3913     UChar* targetLimit = target+100;
3914     kr=ucnv_open("iso-2022-kr", &errorCode);
3915     if(U_FAILURE(errorCode)) {
3916         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
3917         return;
3918     }
3919     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3920     if(U_FAILURE(errorCode)) {
3921         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3922         return;
3923     }
3924     kr1 = ucnv_open("ibm-25546", &errorCode);
3925     if(U_FAILURE(errorCode)) {
3926         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
3927         return;
3928     }
3929     target = tgt;
3930     targetLimit = target+100;
3931     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3932
3933     if(U_FAILURE(errorCode)) {
3934         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3935         return;
3936     }
3937
3938     ucnv_close(kr);
3939     ucnv_close(kr1);
3940
3941 }
3942
3943 static void
3944 TestJIS(){
3945     /* From Unicode */
3946     {
3947         /* JIS Encoding */
3948         UChar sampleTextJIS[] ={
3949             0xFF81, 0xFF82,
3950             0x30EC, 0x30ED,
3951             0x30EE, 0x30EF,
3952             0xFF93, 0xFF94,
3953             0xFF95, 0xFF96,
3954             0xFF97, 0xFF98
3955         };
3956         const uint8_t expectedISO2022JIS[] ={
3957                 0x1b, 0x24, 0x42,
3958                 0x25, 0x41, 0x25, 0x44,
3959                 0x25, 0x6c, 0x25, 0x6d,
3960                 0x25, 0x6e, 0x25, 0x6F,
3961                 0x25, 0x62, 0x25, 0x64,
3962                 0x25, 0x66, 0x25, 0x68,
3963                 0x25, 0x69, 0x25, 0x6a
3964
3965         };
3966         int32_t fmISO2022JISOffs[] ={
3967             0,0,0,
3968             0,0,1,1,
3969             2,2,3,3,
3970             4,4,5,5,
3971             6,6,7,7,
3972             8,8,9,9,
3973             10,10,11,11
3974
3975         };
3976
3977         /* JIS7 Encoding */
3978         const uint8_t expectedISO2022JIS7[] ={
3979                 0x1b, 0x28, 0x49,
3980                 0x41, 0x42,
3981                 0x1b, 0x24, 0x42,
3982                 0x25, 0x6c, 0x25, 0x6d,
3983                 0x25, 0x6e, 0x25, 0x6F,
3984                 0x1b, 0x28, 0x49,
3985                 0x53, 0x54,
3986                 0x55, 0x56,
3987                 0x57, 0x58
3988
3989         };
3990         int32_t fmISO2022JIS7Offs[] ={
3991             0,0,0,
3992             0,1,
3993             2,2,2,
3994             2,2,3,3,
3995             4,4,5,5,
3996             6,6,6,
3997             6,7,
3998             8,9,
3999             10,11
4000
4001         };
4002
4003         /* JIS8 Encoding */
4004         const uint8_t expectedISO2022JIS8[] ={
4005                 0x1b, 0x28, 0x4A,
4006                 0xC1, 0xC2,
4007                 0x1b, 0x24, 0x42,
4008                 0x25, 0x6c, 0x25, 0x6d,
4009                 0x25, 0x6e, 0x25, 0x6F,
4010                 0x1b, 0x28, 0x4A,
4011                 0xD3, 0xD4,
4012                 0xD5, 0xD6,
4013                 0xD7, 0xD8
4014
4015         };
4016         int32_t fmISO2022JIS8Offs[] ={
4017             0,0,0,
4018             0,1,
4019             2,2,2,
4020             2,2,3,3,
4021             4,4,5,5,
4022             6,6,6,
4023             6,7,
4024             8,9,
4025             10,11
4026
4027         };
4028         testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
4029             expectedISO2022JIS, sizeof(expectedISO2022JIS), "JIS", fmISO2022JISOffs,TRUE );
4030         testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
4031             expectedISO2022JIS7, sizeof(expectedISO2022JIS7), "JIS7", fmISO2022JIS7Offs,FALSE );
4032         testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
4033             expectedISO2022JIS8, sizeof(expectedISO2022JIS8), "JIS8", fmISO2022JIS8Offs,FALSE );
4034
4035
4036     }
4037     /*To Unicode*/
4038     {
4039         const uint8_t sampleTextJIS[] = {
4040             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4041             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4042             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4043         };
4044         const uint16_t expectedISO2022JIS[] = {
4045             0x0041, 0x0042,
4046             0xFF81, 0xFF82,
4047             0x3000
4048         };
4049         int32_t  toISO2022JISOffs[]={
4050             3,4,
4051             8,9,
4052             16
4053         };
4054
4055         const uint8_t sampleTextJIS7[] = {
4056             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4057             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4058             0x1b,0x24,0x42,0x21,0x21,
4059             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4060             0x21,0x22,
4061             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4062         };
4063         const uint16_t expectedISO2022JIS7[] = {
4064             0x0041, 0x0042,
4065             0xFF81, 0xFF82,
4066             0x3000,
4067             0xFF81, 0xFF82,
4068             0x3001,
4069             0x3000
4070         };
4071         int32_t  toISO2022JIS7Offs[]={
4072             3,4,
4073             8,9,
4074             13,16,
4075             17,
4076             19,27
4077         };
4078         const uint8_t sampleTextJIS8[] = {
4079             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4080             0xa1,0xc8,0xd9,/*Katakana Set*/
4081             0x1b,0x28,0x42,
4082             0x41,0x42,
4083             0xb1,0xc3, /*Katakana Set*/
4084             0x1b,0x24,0x42,0x21,0x21
4085         };
4086         const uint16_t expectedISO2022JIS8[] = {
4087             0x0041, 0x0042,
4088             0xff61, 0xff88, 0xff99,
4089             0x0041, 0x0042,
4090             0xff71, 0xff83,
4091             0x3000
4092         };
4093         int32_t  toISO2022JIS8Offs[]={
4094             3, 4,  5,  6,
4095             7, 11, 12, 13,
4096             14, 18,
4097         };
4098
4099         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4100             sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4101         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4102             sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4103         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4104             sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4105     }
4106
4107 }
4108
4109 static void TestJitterbug915(){
4110 /* tests for roundtripping of the below sequence
4111 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4112 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4113 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4114 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4115 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4116 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4117 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4118 */
4119     static char cSource[]={
4120         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4121         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4122         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4123         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4124         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4125         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4126         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
4127         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4128         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4129         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4130         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4131         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4132         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4133         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4134         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4135         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4136         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4137         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4138         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4139         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4140         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4141         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4142         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4143         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4144         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4145         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4146         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4147         0x37, 0x20, 0x2A, 0x2F,
4148     };
4149     UChar uTarget[500]={'\0'};
4150     UChar* utarget=uTarget;
4151     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4152
4153     char cTarget[500]={'\0'};
4154     char* ctarget=cTarget;
4155     char* ctargetLimit=cTarget+sizeof(cTarget);
4156     const char* csource=cSource;
4157     char* tempSrc = cSource;
4158     UErrorCode err=U_ZERO_ERROR;
4159
4160     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4161     if(U_FAILURE(err)) {
4162         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4163         return;
4164     }
4165     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4166     if(U_FAILURE(err)) {
4167         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4168         return;
4169     }
4170     utargetLimit=utarget;
4171     utarget = uTarget;
4172     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4173     if(U_FAILURE(err)) {
4174         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4175         return;
4176     }
4177     ctargetLimit=ctarget;
4178     ctarget =cTarget;
4179     while(ctarget<ctargetLimit){
4180         if(*(ctarget++) != *(tempSrc++)){
4181             log_err("Expected : \\x%02X \t Got: \\x%02X\n",*ctarget,(int)*tempSrc) ;
4182         }
4183     }
4184
4185     ucnv_close(conv);
4186 }
4187
4188 static void
4189 TestISO_2022_CN_EXT() {
4190     /* test input */
4191     static const uint16_t in[]={
4192                 /* test Non-BMP code points */
4193          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4194          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4195          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4196          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4197          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4198          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4199          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4200          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4201          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4202          0xD869, 0xDED5,
4203
4204          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4205          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4206          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4207          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4208          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4209          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4210          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4211          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4212          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4213          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4214          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4215          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4216          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4217          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4218          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4219          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4220          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4221          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4222
4223          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4224
4225       };
4226
4227     const UChar* uSource;
4228     const UChar* uSourceLimit;
4229     const char* cSource;
4230     const char* cSourceLimit;
4231     UChar *uTargetLimit =NULL;
4232     UChar *uTarget;
4233     char *cTarget;
4234     const char *cTargetLimit;
4235     char *cBuf;
4236     UChar *uBuf,*test;
4237     int32_t uBufSize = 180;
4238     UErrorCode errorCode=U_ZERO_ERROR;
4239     UConverter *cnv;
4240     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4241     int32_t* myOff= offsets;
4242     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4243     if(U_FAILURE(errorCode)) {
4244         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4245         return;
4246     }
4247
4248     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4249     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4250     uSource = (const UChar*)&in[0];
4251     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4252     cTarget = cBuf;
4253     cTargetLimit = cBuf +uBufSize*5;
4254     uTarget = uBuf;
4255     uTargetLimit = uBuf+ uBufSize*5;
4256     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4257     if(U_FAILURE(errorCode)){
4258         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4259         return;
4260     }
4261     cSource = cBuf;
4262     cSourceLimit =cTarget;
4263     test =uBuf;
4264     myOff=offsets;
4265     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4266     if(U_FAILURE(errorCode)){
4267         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4268         return;
4269     }
4270     uSource = (const UChar*)&in[0];
4271     while(uSource<uSourceLimit){
4272         if(*test!=*uSource){
4273             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4274         }
4275         else{
4276             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4277         }
4278         uSource++;
4279         test++;
4280     }
4281     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4282     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4283     /*Test for the condition where there is an invalid character*/
4284     ucnv_reset(cnv);
4285     {
4286         static const uint8_t source2[]={0x0e,0x24,0x053};
4287         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4288     }
4289     ucnv_close(cnv);
4290     free(uBuf);
4291     free(cBuf);
4292     free(offsets);
4293 }
4294
4295 static void
4296 TestISO_2022_CN() {
4297     /* test input */
4298     static const uint16_t in[]={
4299          /* jitterbug 951 */
4300          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4301          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4302          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4303          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4304          0x0020, 0x0045, 0x004e, 0x0044,
4305          /**/
4306          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4307          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4308          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4309          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4310          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4311          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4312          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4313          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4314          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4315          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4316          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4317          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4318          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4319          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4320          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4321          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4322          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4323
4324       };
4325     const UChar* uSource;
4326     const UChar* uSourceLimit;
4327     const char* cSource;
4328     const char* cSourceLimit;
4329     UChar *uTargetLimit =NULL;
4330     UChar *uTarget;
4331     char *cTarget;
4332     const char *cTargetLimit;
4333     char *cBuf;
4334     UChar *uBuf,*test;
4335     int32_t uBufSize = 180;
4336     UErrorCode errorCode=U_ZERO_ERROR;
4337     UConverter *cnv;
4338     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4339     int32_t* myOff= offsets;
4340     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4341     if(U_FAILURE(errorCode)) {
4342         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4343         return;
4344     }
4345
4346     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4347     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4348     uSource = (const UChar*)&in[0];
4349     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4350     cTarget = cBuf;
4351     cTargetLimit = cBuf +uBufSize*5;
4352     uTarget = uBuf;
4353     uTargetLimit = uBuf+ uBufSize*5;
4354     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4355     if(U_FAILURE(errorCode)){
4356         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4357         return;
4358     }
4359     cSource = cBuf;
4360     cSourceLimit =cTarget;
4361     test =uBuf;
4362     myOff=offsets;
4363     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4364     if(U_FAILURE(errorCode)){
4365         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4366         return;
4367     }
4368     uSource = (const UChar*)&in[0];
4369     while(uSource<uSourceLimit){
4370         if(*test!=*uSource){
4371             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4372         }
4373         else{
4374             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4375         }
4376         uSource++;
4377         test++;
4378     }
4379     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4380     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4381     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4382     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4383     TestJitterbug930("csISO2022CN");
4384     /*Test for the condition where there is an invalid character*/
4385     ucnv_reset(cnv);
4386     {
4387         static const uint8_t source2[]={0x0e,0x24,0x053};
4388         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4389     }
4390
4391     ucnv_close(cnv);
4392     free(uBuf);
4393     free(cBuf);
4394     free(offsets);
4395 }
4396
4397 static void
4398 TestEBCDIC_STATEFUL() {
4399     /* test input */
4400     static const uint8_t in[]={
4401         0x61,
4402         0x1a,
4403         0x0f, 0x4b,
4404         0x42,
4405         0x40,
4406         0x36,
4407     };
4408
4409     /* expected test results */
4410     static const uint32_t results[]={
4411         /* number of bytes read, code point */
4412         1, 0x002f,
4413         1, 0x0092,
4414         2, 0x002e,
4415         1, 0xff62,
4416         1, 0x0020,
4417         1, 0x0096,
4418
4419     };
4420     static const uint8_t in2[]={
4421         0x0f,
4422         0xa1,
4423         0x01
4424     };
4425
4426     /* expected test results */
4427     static const uint32_t results2[]={
4428         /* number of bytes read, code point */
4429         2, 0x203E,
4430         1, 0x0001,
4431     };
4432
4433     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4434     UErrorCode errorCode=U_ZERO_ERROR;
4435     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4436     if(U_FAILURE(errorCode)) {
4437         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4438         return;
4439     }
4440     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4441     ucnv_reset(cnv);
4442      /* Test the condition when source >= sourceLimit */
4443     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4444     ucnv_reset(cnv);
4445     /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4446     {
4447         static const uint8_t source1[]={0x0f};
4448         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4449     }
4450     /*Test for the condition where there is an invalid character*/
4451     ucnv_reset(cnv);
4452     {
4453         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4454         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4455     }
4456     ucnv_reset(cnv);
4457     source=(const char*)in2;
4458     limit=(const char*)in2+sizeof(in2);
4459     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4460     ucnv_close(cnv);
4461
4462 }
4463
4464 static void
4465 TestGB18030() {
4466     /* test input */
4467     static const uint8_t in[]={
4468         0x24,
4469         0x7f,
4470         0x81, 0x30, 0x81, 0x30,
4471         0xa8, 0xbf,
4472         0xa2, 0xe3,
4473         0xd2, 0xbb,
4474         0x82, 0x35, 0x8f, 0x33,
4475         0x84, 0x31, 0xa4, 0x39,
4476         0x90, 0x30, 0x81, 0x30,
4477         0xe3, 0x32, 0x9a, 0x35
4478 #if 0
4479         /*
4480          * Feature removed   markus 2000-oct-26
4481          * Only some codepages must match surrogate pairs into supplementary code points -
4482          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4483          * GB 18030 provides direct encodings for supplementary code points, therefore
4484          * it must not combine two single-encoded surrogates into one code point.
4485          */
4486         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4487 #endif
4488     };
4489
4490     /* expected test results */
4491     static const uint32_t results[]={
4492         /* number of bytes read, code point */
4493         1, 0x24,
4494         1, 0x7f,
4495         4, 0x80,
4496         2, 0x1f9,
4497         2, 0x20ac,
4498         2, 0x4e00,
4499         4, 0x9fa6,
4500         4, 0xffff,
4501         4, 0x10000,
4502         4, 0x10ffff
4503 #if 0
4504         /* Feature removed. See comment above. */
4505         8, 0x10000
4506 #endif
4507     };
4508
4509 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4510     UErrorCode errorCode=U_ZERO_ERROR;
4511     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4512     if(U_FAILURE(errorCode)) {
4513         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4514         return;
4515     }
4516     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4517     ucnv_close(cnv);
4518 }
4519
4520 static void
4521 TestLMBCS() {
4522     /* LMBCS-1 string */
4523     static const uint8_t pszLMBCS[]={
4524         0x61,
4525         0x01, 0x29,
4526         0x81,
4527         0xA0,
4528         0x0F, 0x27,
4529         0x0F, 0x91,
4530         0x14, 0x0a, 0x74,
4531         0x14, 0xF6, 0x02,
4532         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4533         0x10, 0x88, 0xA0,
4534     };
4535
4536     /* Unicode UChar32 equivalents */
4537     static const UChar32 pszUnicode32[]={
4538         /* code point */
4539         0x00000061,
4540         0x00002013,
4541         0x000000FC,
4542         0x000000E1,
4543         0x00000007,
4544         0x00000091,
4545         0x00000a74,
4546         0x00000200,
4547         0x00023456, /* code point for surrogate pair */
4548         0x00005516
4549     };
4550
4551 /* Unicode UChar equivalents */
4552     static const UChar pszUnicode[]={
4553         /* code point */
4554         0x0061,
4555         0x2013,
4556         0x00FC,
4557         0x00E1,
4558         0x0007,
4559         0x0091,
4560         0x0a74,
4561         0x0200,
4562         0xD84D, /* low surrogate */
4563         0xDC56, /* high surrogate */
4564         0x5516
4565     };
4566
4567 /* expected test results */
4568     static const int offsets32[]={
4569         /* number of bytes read, code point */
4570         0,
4571         1,
4572         3,
4573         4,
4574         5,
4575         7,
4576         9,
4577         12,
4578         15,
4579         21,
4580         24
4581     };
4582
4583 /* expected test results */
4584     static const int offsets[]={
4585         /* number of bytes read, code point */
4586         0,
4587         1,
4588         3,
4589         4,
4590         5,
4591         7,
4592         9,
4593         12,
4594         15,
4595         18,
4596         21,
4597         24
4598     };
4599
4600
4601     UConverter *cnv;
4602
4603 #define NAME_LMBCS_1 "LMBCS-1"
4604 #define NAME_LMBCS_2 "LMBCS-2"
4605
4606
4607    /* Some basic open/close/property tests on some LMBCS converters */
4608     {
4609
4610       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4611       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4612       char get_subchars [1];
4613       const char * get_name;
4614       UConverter *cnv1;
4615       UConverter *cnv2;
4616
4617       int8_t len = sizeof(get_subchars);
4618
4619       UErrorCode errorCode=U_ZERO_ERROR;
4620
4621       /* Open */
4622       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4623       if(U_FAILURE(errorCode)) {
4624          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4625          return;
4626       }
4627       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4628       if(U_FAILURE(errorCode)) {
4629          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4630          return;
4631       }
4632
4633       /* Name */
4634       get_name = ucnv_getName (cnv1, &errorCode);
4635       if (strcmp(NAME_LMBCS_1,get_name)){
4636          log_err("Unexpected converter name: %s\n", get_name);
4637       }
4638       get_name = ucnv_getName (cnv2, &errorCode);
4639       if (strcmp(NAME_LMBCS_2,get_name)){
4640          log_err("Unexpected converter name: %s\n", get_name);
4641       }
4642
4643       /* substitution chars */
4644       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4645       if(U_FAILURE(errorCode)) {
4646          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4647       }
4648       if (len!=1){
4649          log_err("Unexpected length of sub chars\n");
4650       }
4651       if (get_subchars[0] != expected_subchars[0]){
4652            log_err("Unexpected value of sub chars\n");
4653       }
4654       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4655       if(U_FAILURE(errorCode)) {
4656          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4657       }
4658       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4659       if(U_FAILURE(errorCode)) {
4660          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4661       }
4662       if (len!=1){
4663          log_err("Unexpected length of sub chars\n");
4664       }
4665       if (get_subchars[0] != new_subchars[0]){
4666            log_err("Unexpected value of sub chars\n");
4667       }
4668       ucnv_close(cnv1);
4669       ucnv_close(cnv2);
4670
4671     }
4672
4673     /* LMBCS to Unicode - offsets */
4674     {
4675        UErrorCode errorCode=U_ZERO_ERROR;
4676
4677        const uint8_t * pSource = pszLMBCS;
4678        const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4679
4680        UChar Out [sizeof(pszUnicode) + 1];
4681        UChar * pOut = Out;
4682        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4683
4684        int32_t off [sizeof(offsets)];
4685
4686       /* last 'offset' in expected results is just the final size.
4687          (Makes other tests easier). Compensate here: */
4688
4689        off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4690
4691
4692
4693       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4694       if(U_FAILURE(errorCode)) {
4695            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4696            return;
4697       }
4698
4699
4700
4701       ucnv_toUnicode (cnv,
4702                       &pOut,
4703                       OutLimit,
4704                       (const char **)&pSource,
4705                       (const char *)sourceLimit,
4706                       off,
4707                       TRUE,
4708                       &errorCode);
4709
4710
4711        if (memcmp(off,offsets,sizeof(offsets)))
4712        {
4713          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4714        }
4715        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4716        {
4717          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4718        }
4719        ucnv_close(cnv);
4720     }
4721     {
4722    /* LMBCS to Unicode - getNextUChar */
4723       const char * sourceStart;
4724       const char *source=(const char *)pszLMBCS;
4725       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4726       const UChar32 *results= pszUnicode32;
4727       const int *off = offsets32;
4728
4729       UErrorCode errorCode=U_ZERO_ERROR;
4730       UChar32 uniChar;
4731
4732       cnv=ucnv_open("LMBCS-1", &errorCode);
4733       if(U_FAILURE(errorCode)) {
4734            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4735            return;
4736       }
4737       else
4738       {
4739
4740          while(source<limit) {
4741             sourceStart=source;
4742             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4743             if(U_FAILURE(errorCode)) {
4744                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4745                   break;
4746             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4747                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4748                    uniChar, (source-sourceStart), *results, *off);
4749                break;
4750             }
4751             results++;
4752             off++;
4753          }
4754        }
4755        ucnv_close(cnv);
4756     }
4757     { /* test locale & optimization group operations: Unicode to LMBCS */
4758
4759       UErrorCode errorCode=U_ZERO_ERROR;
4760       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4761       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4762       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4763       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4764       const UChar * pUniOut = uniString;
4765       UChar * pUniIn = uniString;
4766       uint8_t lmbcsString [4];
4767       const uint8_t * pLMBCSOut = lmbcsString;
4768       uint8_t * pLMBCSIn = lmbcsString;
4769
4770       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4771       ucnv_fromUnicode (cnv16he,
4772                         (char **)&pLMBCSIn, (const char *)(pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4773                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4774                         NULL, 1, &errorCode);
4775
4776       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4777       {
4778          log_err("LMBCS-16,locale=he gives unexpected translation\n");
4779       }
4780
4781       pLMBCSIn=lmbcsString;
4782       pUniOut = uniString;
4783       ucnv_fromUnicode (cnv01us,
4784                         (char **)&pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4785                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4786                         NULL, 1, &errorCode);
4787
4788       if (lmbcsString[0] != 0x9F)
4789       {
4790          log_err("LMBCS-1,locale=US gives unexpected translation\n");
4791       }
4792
4793       /* single byte char from mbcs char set */
4794       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
4795       pLMBCSOut = lmbcsString;
4796       pUniIn = uniString;
4797       ucnv_toUnicode (cnv16jp,
4798                         &pUniIn, pUniIn + 1,
4799                         (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 1),
4800                         NULL, 1, &errorCode);
4801       if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4802       {
4803            log_err("Unexpected results from LMBCS-16 single byte char\n");
4804       }
4805       /* convert to group 1: should be 3 bytes */
4806       pLMBCSIn = lmbcsString;
4807       pUniOut = uniString;
4808       ucnv_fromUnicode (cnv01us,
4809                         (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 3),
4810                         &pUniOut, pUniOut + 1,
4811                         NULL, 1, &errorCode);
4812       if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+3 || pUniOut != uniString+1
4813          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4814       {
4815            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4816       }
4817       pLMBCSOut = lmbcsString;
4818       pUniIn = uniString;
4819       ucnv_toUnicode (cnv01us,
4820                         &pUniIn, pUniIn + 1,
4821                         (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 3),
4822                         NULL, 1, &errorCode);
4823       if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4824       {
4825            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4826       }
4827       pLMBCSIn = lmbcsString;
4828       pUniOut = uniString;
4829       ucnv_fromUnicode (cnv16jp,
4830                         (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 1),
4831                         &pUniOut, pUniOut + 1,
4832                         NULL, 1, &errorCode);
4833       if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
4834       {
4835            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4836       }
4837       ucnv_close(cnv16he);
4838       ucnv_close(cnv16jp);
4839       ucnv_close(cnv01us);
4840     }
4841     {
4842        /* Small source buffer testing, LMBCS -> Unicode */
4843
4844        UErrorCode errorCode=U_ZERO_ERROR;
4845
4846        const uint8_t * pSource = pszLMBCS;
4847        const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4848        int codepointCount = 0;
4849
4850        UChar Out [sizeof(pszUnicode) + 1];
4851        UChar * pOut = Out;
4852        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4853
4854
4855        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4856        if(U_FAILURE(errorCode)) {
4857            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4858            return;
4859        }
4860
4861
4862        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4863        {
4864            ucnv_toUnicode (cnv,
4865                &pOut,
4866                OutLimit,
4867                (const char **)&pSource,
4868                (const char *)(pSource+1), /* claim that this is a 1- byte buffer */
4869                NULL,
4870                FALSE,    /* FALSE means there might be more chars in the next buffer */
4871                &errorCode);
4872
4873            if (U_SUCCESS (errorCode))
4874            {
4875                if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1])
4876                {
4877                    /* we are on to the next code point: check value */
4878
4879                    if (Out[0] != pszUnicode[codepointCount]){
4880                        log_err("LMBCS->Uni result %lx should have been %lx \n",
4881                            Out[0], pszUnicode[codepointCount]);
4882                    }
4883
4884                    pOut = Out; /* reset for accumulating next code point */
4885                    codepointCount++;
4886                }
4887            }
4888            else
4889            {
4890                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
4891            }
4892        }
4893        {
4894          /* limits & surrogate error testing */
4895          uint8_t LIn [sizeof(pszLMBCS)];
4896          const uint8_t * pLIn = LIn;
4897
4898          char LOut [sizeof(pszLMBCS)];
4899          char * pLOut = LOut;
4900
4901          UChar UOut [sizeof(pszUnicode)];
4902          UChar * pUOut = UOut;
4903
4904          UChar UIn [sizeof(pszUnicode)];
4905          const UChar * pUIn = UIn;
4906
4907          int32_t off [sizeof(offsets)];
4908          UChar32 uniChar;
4909
4910          errorCode=U_ZERO_ERROR;
4911
4912          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4913          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
4914          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4915          {
4916             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
4917          }
4918          errorCode=U_ZERO_ERROR;
4919          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
4920          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4921          {
4922             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
4923          }
4924          errorCode=U_ZERO_ERROR;
4925
4926          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
4927          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4928          {
4929             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
4930          }
4931          errorCode=U_ZERO_ERROR;
4932
4933          /* 0 byte source request - no error, no pointer movement */
4934          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
4935          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
4936          if(U_FAILURE(errorCode)) {
4937             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
4938          }
4939          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
4940          {
4941               log_err("Unexpected pointer move in 0 byte source request \n");
4942          }
4943          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
4944          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
4945          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4946          {
4947             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
4948          }
4949          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
4950          {
4951             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
4952          }
4953          errorCode = U_ZERO_ERROR;
4954
4955          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
4956
4957          pUIn = pszUnicode;
4958          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
4959          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
4960          {
4961             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
4962          }
4963
4964          errorCode = U_ZERO_ERROR;
4965
4966          pLIn = pszLMBCS;
4967          ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
4968          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4])
4969          {
4970             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
4971          }
4972
4973          /* unpaired or chopped LMBCS surrogates */
4974
4975          /* OK high surrogate, Low surrogate is chopped */
4976          LIn [0] = 0x14;
4977          LIn [1] = 0xD8;
4978          LIn [2] = 0x01;
4979          LIn [3] = 0x14;
4980          LIn [4] = 0xDC;
4981          pLIn = LIn;
4982          errorCode = U_ZERO_ERROR;
4983          pUOut = UOut;
4984
4985          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
4986          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
4987          {
4988             log_err("Unexpected results on chopped low surrogate\n");
4989          }
4990
4991          /* chopped at surrogate boundary */
4992          LIn [0] = 0x14;
4993          LIn [1] = 0xD8;
4994          LIn [2] = 0x01;
4995          pLIn = LIn;
4996          errorCode = U_ZERO_ERROR;
4997          pUOut = UOut;
4998
4999          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5000          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5001          {
5002             log_err("Unexpected results on chopped at surrogate boundary \n");
5003          }
5004
5005          /* unpaired surrogate plus valid Unichar */
5006          LIn [0] = 0x14;
5007          LIn [1] = 0xD8;
5008          LIn [2] = 0x01;
5009          LIn [3] = 0x14;
5010          LIn [4] = 0xC9;
5011          LIn [5] = 0xD0;
5012          pLIn = LIn;
5013          errorCode = U_ZERO_ERROR;
5014          pUOut = UOut;
5015
5016          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5017          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5018          {
5019             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5020          }
5021
5022       /* unpaired surrogate plus chopped Unichar */
5023          LIn [0] = 0x14;
5024          LIn [1] = 0xD8;
5025          LIn [2] = 0x01;
5026          LIn [3] = 0x14;
5027          LIn [4] = 0xC9;
5028
5029          pLIn = LIn;
5030          errorCode = U_ZERO_ERROR;
5031          pUOut = UOut;
5032
5033          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5034          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5035          {
5036             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5037          }
5038
5039          /* unpaired surrogate plus valid non-Unichar */
5040          LIn [0] = 0x14;
5041          LIn [1] = 0xD8;
5042          LIn [2] = 0x01;
5043          LIn [3] = 0x0F;
5044          LIn [4] = 0x3B;
5045
5046          pLIn = LIn;
5047          errorCode = U_ZERO_ERROR;
5048          pUOut = UOut;
5049
5050          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5051          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5052          {
5053             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5054          }
5055
5056          /* unpaired surrogate plus chopped non-Unichar */
5057          LIn [0] = 0x14;
5058          LIn [1] = 0xD8;
5059          LIn [2] = 0x01;
5060          LIn [3] = 0x0F;
5061
5062          pLIn = LIn;
5063          errorCode = U_ZERO_ERROR;
5064          pUOut = UOut;
5065
5066          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5067
5068          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5069          {
5070             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5071          }
5072        }
5073     }
5074    ucnv_close(cnv);  /* final cleanup */
5075 }
5076
5077
5078 static void TestJitterbug255()
5079 {
5080     const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5081     const uint8_t *testBuffer = testBytes;
5082     const uint8_t *testEnd = testBytes + sizeof(testBytes);
5083     UErrorCode status = U_ZERO_ERROR;
5084     UChar32 result;
5085     UConverter *cnv = 0;
5086
5087     cnv = ucnv_open("shift-jis", &status);
5088     if (U_FAILURE(status) || cnv == 0) {
5089         log_data_err("Failed to open the converter for SJIS.\n");
5090                 return;
5091     }
5092     while (testBuffer != testEnd)
5093     {
5094         result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status);
5095         if (U_FAILURE(status))
5096         {
5097             log_err("Failed to convert the next UChar for SJIS.\n");
5098             break;
5099         }
5100     }
5101     ucnv_close(cnv);
5102 }
5103
5104 static void TestEBCDICUS4XML()
5105 {
5106     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5107     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5108     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5109     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5110     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5111     UChar *unicodes = unicodes_x;
5112     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5113     char *target = target_x;
5114     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5115     UErrorCode status = U_ZERO_ERROR;
5116     UConverter *cnv = 0;
5117
5118     cnv = ucnv_open("ebcdic-xml-us", &status);
5119     if (U_FAILURE(status) || cnv == 0) {
5120         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5121         return;
5122     }
5123     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5124     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5125         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5126             u_errorName(status));
5127         printUSeqErr(unicodes_x, 3);
5128         printUSeqErr(toUnicodeMaps, 3);
5129     }
5130     status = U_ZERO_ERROR;
5131     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5132     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5133         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5134             u_errorName(status));
5135         printSeqErr((const unsigned char*)target_x, 3);
5136         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5137     }
5138     ucnv_close(cnv);
5139 }
5140
5141 #if !UCONFIG_NO_COLLATION
5142
5143 static void TestJitterbug981(){
5144   const UChar* rules;
5145   int32_t rules_length, target_cap, bytes_needed;
5146   UErrorCode status = U_ZERO_ERROR;
5147   UConverter *utf8cnv;
5148   UCollator* myCollator;
5149   char buff[50000];
5150   int numNeeded=0;
5151   utf8cnv = ucnv_open ("utf8", &status);
5152   if(U_FAILURE(status)){
5153       log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
5154       return;
5155   }
5156   myCollator = ucol_open("zh", &status);
5157   if(U_FAILURE(status)){
5158       log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
5159       return;
5160   }
5161
5162   rules = ucol_getRules(myCollator, &rules_length);
5163
5164   target_cap = 0;
5165   do {
5166       ucnv_reset(utf8cnv);
5167       status = U_ZERO_ERROR;
5168       bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5169                                      rules, rules_length, &status);
5170       target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5171       if(numNeeded!=0 && numNeeded!= bytes_needed){
5172           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5173       }
5174       numNeeded = bytes_needed;
5175   } while (status == U_BUFFER_OVERFLOW_ERROR);
5176   ucol_close(myCollator);
5177   ucnv_close(utf8cnv);
5178 }
5179
5180 #endif
5181
5182 static void TestJitterbug1293(){
5183     UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5184     char target[256];
5185     UErrorCode status = U_ZERO_ERROR;
5186     UConverter* conv=NULL;
5187     int32_t target_cap, bytes_needed, numNeeded = 0;
5188     conv = ucnv_open("shift-jis",&status);
5189     if(U_FAILURE(status)){
5190       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5191       return;
5192     }
5193
5194     do{
5195         target_cap =0;
5196         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5197         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5198         if(numNeeded!=0 && numNeeded!= bytes_needed){
5199           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5200         }
5201         numNeeded = bytes_needed;
5202     } while (status == U_BUFFER_OVERFLOW_ERROR);
5203     if(U_FAILURE(status)){
5204       log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5205       return;
5206     }
5207     ucnv_close(conv);
5208 }
5209
5210 #endif