icuSources/test/cintltst/nucnvtst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2004, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File CCONVTST.C
   9 *
  10 * Modification History:
  11 *        Name                     Description
  12 *    Steven R. Loomis     7/8/1999      Adding input buffer test
  13 *********************************************************************************
  14 */
  15 #include <stdio.h>
  16 #include "cstring.h"
  17 #include "unicode/uloc.h"
  18 #include "unicode/ucnv.h"
  19 #include "unicode/ucnv_err.h"
  20 #include "cintltst.h"
  21 #include "unicode/utypes.h"
  22 #include "unicode/ustring.h"
  23 #include "unicode/ucol.h"
  24 #include "cmemory.h"
  25
  26 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
  27 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
  28 #if !UCONFIG_NO_COLLATION
  29 static void TestJitterbug981(void);
  30 #endif
  31 static void TestJitterbug1293(void);
  32 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
  33 static void TestConverterTypesAndStarters(void);
  34 static void TestAmbiguous(void);
  35 static void TestSignatureDetection(void);
  36 static void TestUTF7(void);
  37 static void TestIMAP(void);
  38 static void TestUTF8(void);
  39 static void TestCESU8(void);
  40 static void TestUTF16(void);
  41 static void TestUTF16BE(void);
  42 static void TestUTF16LE(void);
  43 static void TestUTF32(void);
  44 static void TestUTF32BE(void);
  45 static void TestUTF32LE(void);
  46 static void TestLATIN1(void);
  47 static void TestSBCS(void);
  48 static void TestDBCS(void);
  49 static void TestMBCS(void);
  50 #ifdef U_ENABLE_GENERIC_ISO_2022
  51 static void TestISO_2022(void);
  52 #endif
  53 static void TestISO_2022_JP(void);
  54 static void TestISO_2022_JP_1(void);
  55 static void TestISO_2022_JP_2(void);
  56 static void TestISO_2022_KR(void);
  57 static void TestISO_2022_KR_1(void);
  58 static void TestISO_2022_CN(void);
  59 static void TestISO_2022_CN_EXT(void);
  60 static void TestJIS(void);
  61 static void TestHZ(void);
  62 static void TestSCSU(void);
  63 static void TestEBCDIC_STATEFUL(void);
  64 static void TestGB18030(void);
  65 static void TestLMBCS(void);
  66 static void TestJitterbug255(void);
  67 static void TestEBCDICUS4XML(void);
  68 static void TestJitterbug915(void);
  69 static void TestISCII(void);
  70 static void TestConv(const uint16_t in[],
  71                      int len,
  72                      const char* conv,
  73                      const char* lang,
  74                      char byteArr[],
  75                      int byteArrLen);
  76 static void TestRoundTrippingAllUTF(void);
  77 static void TestCoverageMBCS(void);
  78 static void TestJitterbug2346(void);
  79 static void TestJitterbug2411(void);
  80 void addTestNewConvert(TestNode** root);
  81
  82 /* open a converter, using test data if it begins with '@' */
  83 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
  84
  85
  86 #define NEW_MAX_BUFFER 999
  87
  88 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
  89 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
  90 static char     gNuConvTestName[1024];
  91
  92 #define nct_min(x,y)  ((x<y) ? x : y)
  93
  94 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
  95 {
  96   if(cnv && cnv[0] == '@') {
  97     return ucnv_openPackage(loadTestData(err), cnv+1, err);
  98   } else {
  99     return ucnv_open(cnv, err);
 100   }
 101 }
 102
 103 static void printSeq(const unsigned char* a, int len)
 104 {
 105     int i=0;
 106     log_verbose("{");
 107     while (i<len)
 108         log_verbose("0x%02x ", a[i++]);
 109     log_verbose("}\n");
 110 }
 111
 112 static void printUSeq(const UChar* a, int len)
 113 {
 114     int i=0;
 115     log_verbose("{U+");
 116     while (i<len) log_verbose("0x%04x ", a[i++]);
 117     log_verbose("}\n");
 118 }
 119
 120 static void printSeqErr(const unsigned char* a, int len)
 121 {
 122     int i=0;
 123     fprintf(stderr, "{");
 124     while (i<len)
 125         fprintf(stderr, "0x%02x ", a[i++]);
 126     fprintf(stderr, "}\n");
 127 }
 128
 129 static void printUSeqErr(const UChar* a, int len)
 130 {
 131     int i=0;
 132     fprintf(stderr, "{U+");
 133     while (i<len)
 134         fprintf(stderr, "0x%04x ", a[i++]);
 135     fprintf(stderr,"}\n");
 136 }
 137
 138 static void
 139 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
 140 {
 141      const char* s0;
 142      const char* s=(char*)source;
 143      const int32_t *r=results;
 144      UErrorCode errorCode=U_ZERO_ERROR;
 145      UChar32 c;
 146
 147      while(s<limit) {
 148         s0=s;
 149         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 150         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 151             break; /* no more significant input */
 152         } else if(U_FAILURE(errorCode)) {
 153             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
 154             break;
 155         } else if(
 156             /* test the expected number of input bytes only if >=0 */
 157             (*r>=0 && (int32_t)(s-s0)!=*r) ||
 158             c!=*(r+1)
 159         ) {
 160             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
 161                 message, c, (s-s0), *(r+1), *r);
 162             break;
 163         }
 164         r+=2;
 165     }
 166 }
 167
 168 static void
 169 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
 170 {
 171      const char* s=(char*)source;
 172      UErrorCode errorCode=U_ZERO_ERROR;
 173      uint32_t c;
 174      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 175      if(errorCode != expected){
 176         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
 177      }
 178      if(c != 0xFFFD && c != 0xffff){
 179         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
 180      }
 181
 182 }
 183
 184 static void TestInBufSizes(void)
 185 {
 186   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
 187 #if 1
 188   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
 189   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
 190   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
 191   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
 192   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
 193   TestNewConvertWithBufferSizes(1,1);
 194   TestNewConvertWithBufferSizes(2,3);
 195   TestNewConvertWithBufferSizes(3,2);
 196 #endif
 197 }
 198
 199 static void TestOutBufSizes(void)
 200 {
 201 #if 1
 202   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
 203   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
 204   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
 205   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
 206   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
 207   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
 208
 209 #endif
 210 }
 211
 212
 213 void addTestNewConvert(TestNode** root)
 214 {
 215    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
 216    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
 217    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
 218    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
 219    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
 220    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
 221    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
 222    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
 223
 224    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
 225    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
 226    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
 227    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
 228    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
 229    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
 230    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
 231    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
 232    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
 233
 234    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
 235    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
 236    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
 237    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
 238 #ifdef U_ENABLE_GENERIC_ISO_2022
 239    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
 240 #endif
 241    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
 242    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
 243    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
 244    addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
 245    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
 246    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
 247    addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
 248    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
 249    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
 250    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
 251    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
 252    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
 253    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
 254    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
 255    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
 256    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
 257 #if !UCONFIG_NO_COLLATION
 258    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
 259 #endif
 260    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
 261    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
 262    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
 263    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
 264    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
 265
 266 }
 267
 268
 269 /* Note that this test already makes use of statics, so it's not really
 270    multithread safe.
 271    This convenience function lets us make the error messages actually useful.
 272 */
 273
 274 static void setNuConvTestName(const char *codepage, const char *direction)
 275 {
 276     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
 277         codepage,
 278         direction,
 279         (int)gInBufferSize,
 280         (int)gOutBufferSize);
 281 }
 282
 283 typedef enum
 284 {
 285   TC_OK       = 0,  /* test was OK */
 286   TC_MISMATCH = 1,  /* Match failed - err was printed */
 287   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
 288 } ETestConvertResult;
 289
 290 /* Note: This function uses global variables and it will not do offset
 291 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 292 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
 293                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
 294 {
 295     UErrorCode status = U_ZERO_ERROR;
 296     UConverter *conv = 0;
 297     uint8_t    junkout[NEW_MAX_BUFFER]; /* FIX */
 298     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 299     uint8_t *p;
 300     const UChar *src;
 301     uint8_t *end;
 302     uint8_t *targ;
 303     int32_t *offs;
 304     int i;
 305     int32_t   realBufferSize;
 306     uint8_t *realBufferEnd;
 307     const UChar *realSourceEnd;
 308     const UChar *sourceLimit;
 309     UBool checkOffsets = TRUE;
 310     UBool doFlush;
 311
 312     for(i=0;i<NEW_MAX_BUFFER;i++)
 313         junkout[i] = 0xF0;
 314     for(i=0;i<NEW_MAX_BUFFER;i++)
 315         junokout[i] = 0xFF;
 316
 317     setNuConvTestName(codepage, "FROM");
 318
 319     log_verbose("\n=========  %s\n", gNuConvTestName);
 320
 321     conv = my_ucnv_open(codepage, &status);
 322
 323     if(U_FAILURE(status))
 324     {
 325         log_data_err("Couldn't open converter %s\n",codepage);
 326         return TC_FAIL;
 327     }
 328     if(useFallback){
 329         ucnv_setFallback(conv,useFallback);
 330     }
 331
 332     log_verbose("Converter opened..\n");
 333
 334     src = source;
 335     targ = junkout;
 336     offs = junokout;
 337
 338     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 339     realBufferEnd = junkout + realBufferSize;
 340     realSourceEnd = source + sourceLen;
 341
 342     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
 343         checkOffsets = FALSE;
 344
 345     do
 346     {
 347       end = nct_min(targ + gOutBufferSize, realBufferEnd);
 348       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
 349
 350       doFlush = (UBool)(sourceLimit == realSourceEnd);
 351
 352       if(targ == realBufferEnd) {
 353         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
 354         return TC_FAIL;
 355       }
 356       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
 357
 358
 359       status = U_ZERO_ERROR;
 360
 361       ucnv_fromUnicode (conv,
 362                         (char **)&targ,
 363                         (const char*)end,
 364                         &src,
 365                         sourceLimit,
 366                         checkOffsets ? offs : NULL,
 367                         doFlush, /* flush if we're at the end of the input data */
 368                         &status);
 369     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
 370
 371     if(U_FAILURE(status)) {
 372       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 373       return TC_FAIL;
 374     }
 375
 376     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
 377                 sourceLen, targ-junkout);
 378
 379     if(VERBOSITY)
 380     {
 381       char junk[9999];
 382       char offset_str[9999];
 383       uint8_t *ptr;
 384
 385       junk[0] = 0;
 386       offset_str[0] = 0;
 387       for(ptr = junkout;ptr<targ;ptr++) {
 388         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
 389         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
 390       }
 391
 392       log_verbose(junk);
 393       printSeq((const uint8_t *)expect, expectLen);
 394       if ( checkOffsets ) {
 395         log_verbose("\nOffsets:");
 396         log_verbose(offset_str);
 397       }
 398       log_verbose("\n");
 399     }
 400     ucnv_close(conv);
 401
 402     if(expectLen != targ-junkout) {
 403       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 404       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 405       printf("\nGot:");
 406       printSeqErr((const unsigned char*)junkout, targ-junkout);
 407       printf("\nExpected:");
 408       printSeqErr((const unsigned char*)expect, expectLen);
 409       return TC_MISMATCH;
 410     }
 411
 412     if (checkOffsets && (expectOffsets != 0) ) {
 413       log_verbose("comparing %d offsets..\n", targ-junkout);
 414       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
 415         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
 416         printSeqErr((const unsigned char*)junkout, targ-junkout);
 417         log_err("\n");
 418         log_err("Got  :     ");
 419         for(p=junkout;p<targ;p++) {
 420           log_err("%d,", junokout[p-junkout]);
 421         }
 422         log_err("\n");
 423         log_err("Expected:  ");
 424         for(i=0; i<(targ-junkout); i++) {
 425           log_err("%d,", expectOffsets[i]);
 426         }
 427         log_err("\n");
 428       }
 429     }
 430
 431     log_verbose("comparing..\n");
 432     if(!memcmp(junkout, expect, expectLen)) {
 433       log_verbose("Matches!\n");
 434       return TC_OK;
 435     } else {
 436       log_err("String does not match u->%s\n", gNuConvTestName);
 437       printUSeqErr(source, sourceLen);
 438       printf("\nGot:");
 439       printSeqErr((const unsigned char *)junkout, expectLen);
 440       printf("\nExpected:");
 441       printSeqErr((const unsigned char *)expect, expectLen);
 442
 443       return TC_MISMATCH;
 444     }
 445 }
 446
 447 /* Note: This function uses global variables and it will not do offset
 448 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 449 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
 450                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
 451 {
 452     UErrorCode status = U_ZERO_ERROR;
 453     UConverter *conv = 0;
 454     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
 455     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 456     const uint8_t *src;
 457     const uint8_t *realSourceEnd;
 458     const uint8_t *srcLimit;
 459     UChar *p;
 460     UChar *targ;
 461     UChar *end;
 462     int32_t *offs;
 463     int i;
 464     UBool   checkOffsets = TRUE;
 465
 466     int32_t   realBufferSize;
 467     UChar *realBufferEnd;
 468
 469
 470     for(i=0;i<NEW_MAX_BUFFER;i++)
 471         junkout[i] = 0xFFFE;
 472
 473     for(i=0;i<NEW_MAX_BUFFER;i++)
 474         junokout[i] = -1;
 475
 476     setNuConvTestName(codepage, "TO");
 477
 478     log_verbose("\n=========  %s\n", gNuConvTestName);
 479
 480     conv = my_ucnv_open(codepage, &status);
 481
 482     if(U_FAILURE(status))
 483     {
 484         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
 485         return TC_FAIL;
 486     }
 487     if(useFallback){
 488         ucnv_setFallback(conv,useFallback);
 489     }
 490     log_verbose("Converter opened..\n");
 491
 492     src = source;
 493     targ = junkout;
 494     offs = junokout;
 495
 496     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 497     realBufferEnd = junkout + realBufferSize;
 498     realSourceEnd = src + sourcelen;
 499
 500     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
 501         checkOffsets = FALSE;
 502
 503     do
 504     {
 505         end = nct_min( targ + gOutBufferSize, realBufferEnd);
 506         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
 507
 508         if(targ == realBufferEnd)
 509         {
 510             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
 511             return TC_FAIL;
 512         }
 513         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
 514
 515         /* oldTarg = targ; */
 516
 517         status = U_ZERO_ERROR;
 518
 519         ucnv_toUnicode (conv,
 520                 &targ,
 521                 end,
 522                 (const char **)&src,
 523                 (const char *)srcLimit,
 524                 checkOffsets ? offs : NULL,
 525                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
 526                 &status);
 527
 528         /*        offs += (targ-oldTarg); */
 529
 530       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
 531
 532     if(U_FAILURE(status))
 533     {
 534         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 535         return TC_FAIL;
 536     }
 537
 538     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
 539         sourcelen, targ-junkout);
 540     if(VERBOSITY)
 541     {
 542         char junk[9999];
 543         char offset_str[9999];
 544         UChar *ptr;
 545
 546         junk[0] = 0;
 547         offset_str[0] = 0;
 548
 549         for(ptr = junkout;ptr<targ;ptr++)
 550         {
 551             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
 552             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
 553         }
 554
 555         log_verbose(junk);
 556         printUSeq(expect, expectlen);
 557         if ( checkOffsets )
 558           {
 559             log_verbose("\nOffsets:");
 560             log_verbose(offset_str);
 561           }
 562         log_verbose("\n");
 563     }
 564     ucnv_close(conv);
 565
 566     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
 567
 568     if (checkOffsets && (expectOffsets != 0))
 569     {
 570         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
 571             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
 572             log_err("Got:      ");
 573             for(p=junkout;p<targ;p++) {
 574                 log_err("%d,", junokout[p-junkout]);
 575             }
 576             log_err("\n");
 577             log_err("Expected: ");
 578             for(i=0; i<(targ-junkout); i++) {
 579                 log_err("%d,", expectOffsets[i]);
 580             }
 581             log_err("\n");
 582             log_err("output:   ");
 583             for(i=0; i<(targ-junkout); i++) {
 584                 log_err("%X,", junkout[i]);
 585             }
 586             log_err("\n");
 587             log_err("input:    ");
 588             for(i=0; i<(src-source); i++) {
 589                 log_err("%X,", (unsigned char)source[i]);
 590             }
 591             log_err("\n");
 592         }
 593     }
 594
 595     if(!memcmp(junkout, expect, expectlen*2))
 596     {
 597         log_verbose("Matches!\n");
 598         return TC_OK;
 599     }
 600     else
 601     {
 602         log_err("String does not match. %s\n", gNuConvTestName);
 603         log_verbose("String does not match. %s\n", gNuConvTestName);
 604         printf("\nGot:");
 605         printUSeqErr(junkout, expectlen);
 606         printf("\nExpected:");
 607         printUSeqErr(expect, expectlen);
 608         return TC_MISMATCH;
 609     }
 610 }
 611
 612
 613 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
 614 {
 615 /** test chars #1 */
 616     /*  1 2 3  1Han 2Han 3Han .  */
 617     UChar    sampleText[] =
 618      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
 619
 620
 621     const uint8_t expectedUTF8[] =
 622      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
 623     int32_t  toUTF8Offs[] =
 624      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
 625     int32_t fmUTF8Offs[] =
 626      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
 627
 628 #ifdef U_ENABLE_GENERIC_ISO_2022
 629     /* Same as UTF8, but with ^[%B preceeding */
 630     const uint8_t expectedISO2022[] =
 631      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
 632     int32_t  toISO2022Offs[]     =
 633      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
 634        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
 635     int32_t fmISO2022Offs[] =
 636      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
 637 #endif
 638
 639     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
 640     const uint8_t expectedIBM930[] =
 641      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
 642     int32_t  toIBM930Offs[] =
 643      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
 644     int32_t fmIBM930Offs[] =
 645      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
 646
 647     /* 1 2 3 0 h1 h2 h3 . MBCS*/
 648     const uint8_t expectedIBM943[] =
 649      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
 650     int32_t  toIBM943Offs    [] =
 651      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
 652     int32_t fmIBM943Offs[] =
 653      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
 654
 655     /* 1 2 3 0 h1 h2 h3 . DBCS*/
 656     const uint8_t expectedIBM9027[] =
 657      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
 658     int32_t  toIBM9027Offs    [] =
 659      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
 660
 661      /* 1 2 3 0 <?> <?> <?> . SBCS*/
 662     const uint8_t expectedIBM920[] =
 663      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
 664     int32_t  toIBM920Offs    [] =
 665      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
 666
 667     /* 1 2 3 0 <?> <?> <?> . SBCS*/
 668     const uint8_t expectedISO88593[] =
 669      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
 670     int32_t  toISO88593Offs[]     =
 671      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
 672
 673     /* 1 2 3 0 <?> <?> <?> . LATIN_1*/
 674     const uint8_t expectedLATIN1[] =
 675      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
 676     int32_t  toLATIN1Offs[]     =
 677      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
 678
 679
 680     /*  etc */
 681     const uint8_t expectedUTF16BE[] =
 682      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
 683     int32_t      toUTF16BEOffs[]=
 684      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
 685     int32_t fmUTF16BEOffs[] =
 686      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e };
 687
 688     const uint8_t expectedUTF16LE[] =
 689      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
 690     int32_t      toUTF16LEOffs[]=
 691      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07};
 692     int32_t fmUTF16LEOffs[] =
 693      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e };
 694
 695     const uint8_t expectedUTF32BE[] =
 696      { 0x00, 0x00, 0x00, 0x31,
 697        0x00, 0x00, 0x00, 0x32,
 698        0x00, 0x00, 0x00, 0x33,
 699        0x00, 0x00, 0x00, 0x00,
 700        0x00, 0x00, 0x4e, 0x00,
 701        0x00, 0x00, 0x4e, 0x8c,
 702        0x00, 0x00, 0x4e, 0x09,
 703        0x00, 0x00, 0x00, 0x2e };
 704     int32_t      toUTF32BEOffs[]=
 705      { 0x00, 0x00, 0x00, 0x00,
 706        0x01, 0x01, 0x01, 0x01,
 707        0x02, 0x02, 0x02, 0x02,
 708        0x03, 0x03, 0x03, 0x03,
 709        0x04, 0x04, 0x04, 0x04,
 710        0x05, 0x05, 0x05, 0x05,
 711        0x06, 0x06, 0x06, 0x06,
 712        0x07, 0x07, 0x07, 0x07,
 713        0x08, 0x08, 0x08, 0x08 };
 714     int32_t fmUTF32BEOffs[] =
 715      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c };
 716
 717     const uint8_t expectedUTF32LE[] =
 718      { 0x31, 0x00, 0x00, 0x00,
 719        0x32, 0x00, 0x00, 0x00,
 720        0x33, 0x00, 0x00, 0x00,
 721        0x00, 0x00, 0x00, 0x00,
 722        0x00, 0x4e, 0x00, 0x00,
 723        0x8c, 0x4e, 0x00, 0x00,
 724        0x09, 0x4e, 0x00, 0x00,
 725        0x2e, 0x00, 0x00, 0x00 };
 726     int32_t      toUTF32LEOffs[]=
 727      { 0x00, 0x00, 0x00, 0x00,
 728        0x01, 0x01, 0x01, 0x01,
 729        0x02, 0x02, 0x02, 0x02,
 730        0x03, 0x03, 0x03, 0x03,
 731        0x04, 0x04, 0x04, 0x04,
 732        0x05, 0x05, 0x05, 0x05,
 733        0x06, 0x06, 0x06, 0x06,
 734        0x07, 0x07, 0x07, 0x07,
 735        0x08, 0x08, 0x08, 0x08 };
 736     int32_t fmUTF32LEOffs[] =
 737      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c };
 738
 739
 740
 741
 742 /** Test chars #2 **/
 743
 744     /* Sahha [health],  slashed h's */
 745     const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
 746     const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
 747
 748     /* LMBCS */
 749     const UChar LMBCSUChars[]  = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
 750     const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
 751     int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
 752     int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
 753     /*********************************** START OF CODE finally *************/
 754
 755   gInBufferSize = insize;
 756   gOutBufferSize = outsize;
 757
 758   log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
 759
 760
 761 #if 1
 762     /*UTF-8*/
 763     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 764         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
 765
 766     log_verbose("Test surrogate behaviour for UTF8\n");
 767     {
 768         const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
 769         const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
 770                            0xf0, 0x90, 0x90, 0x81,
 771                            0xef, 0xbf, 0xbd
 772         };
 773         int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
 774         testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
 775                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
 776
 777
 778     }
 779 #ifdef U_ENABLE_GENERIC_ISO_2022
 780     /*ISO-2022*/
 781     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 782         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
 783 #endif
 784     /*UTF16 LE*/
 785     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 786         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
 787     /*UTF16 BE*/
 788     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 789         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
 790     /*UTF32 LE*/
 791     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 792         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
 793     /*UTF32 BE*/
 794     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 795         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
 796     /*LATIN_1*/
 797     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 798         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
 799     /*EBCDIC_STATEFUL*/
 800     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 801         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
 802
 803     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 804         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 805
 806     /*MBCS*/
 807
 808     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 809         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
 810     /*DBCS*/
 811     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 812         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
 813     /*SBCS*/
 814     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 815         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
 816     /*SBCS*/
 817     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 818         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 819
 820
 821 /****/
 822 #endif
 823
 824 #if 1
 825     /*UTF-8*/
 826     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
 827         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
 828 #ifdef U_ENABLE_GENERIC_ISO_2022
 829     /*ISO-2022*/
 830     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
 831         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
 832 #endif
 833     /*UTF16 LE*/
 834     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 835         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 836     /*UTF16 BE*/
 837     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
 838         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
 839     /*UTF32 LE*/
 840     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
 841         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
 842     /*UTF32 BE*/
 843     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
 844         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
 845     /*EBCDIC_STATEFUL*/
 846     testConvertToU(expectedIBM930, sizeof(expectedIBM930),
 847         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
 848     /*MBCS*/
 849     testConvertToU(expectedIBM943, sizeof(expectedIBM943),
 850         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
 851
 852     /* Try it again to make sure it still works */
 853     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 854         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 855
 856     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
 857         malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
 858
 859     testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
 860         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
 861
 862     /*LMBCS*/
 863     testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
 864         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
 865     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
 866         LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
 867
 868     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
 869     {
 870         /* encode directly set D and set O */
 871         static const uint8_t utf7[] = {
 872             /*
 873                 Hi Mom -+Jjo--!
 874                 A+ImIDkQ.
 875                 +-
 876                 +ZeVnLIqe
 877             */
 878             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
 879             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 880             0x2b, 0x2d,
 881             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 882         };
 883         static const UChar unicode[] = {
 884             /*
 885                 Hi Mom -<WHITE SMILING FACE>-!
 886                 A<NOT IDENTICAL TO><ALPHA>.
 887                 +
 888                 [Japanese word "nihongo"]
 889             */
 890             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
 891             0x41, 0x2262, 0x0391, 0x2e,
 892             0x2b,
 893             0x65e5, 0x672c, 0x8a9e
 894         };
 895         static const int32_t toUnicodeOffsets[] = {
 896             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
 897             15, 17, 19, 23,
 898             24,
 899             27, 29, 32
 900         };
 901         static const int32_t fromUnicodeOffsets[] = {
 902             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
 903             11, 12, 12, 12, 13, 13, 13, 13, 14,
 904             15, 15,
 905             16, 16, 16, 17, 17, 17, 18, 18, 18
 906         };
 907
 908         /* same but escaping set O (the exclamation mark) */
 909         static const uint8_t utf7Restricted[] = {
 910             /*
 911                 Hi Mom -+Jjo--+ACE-
 912                 A+ImIDkQ.
 913                 +-
 914                 +ZeVnLIqe
 915             */
 916             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
 917             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 918             0x2b, 0x2d,
 919             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 920         };
 921         static const int32_t toUnicodeOffsetsR[] = {
 922             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
 923             19, 21, 23, 27,
 924             28,
 925             31, 33, 36
 926         };
 927         static const int32_t fromUnicodeOffsetsR[] = {
 928             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
 929             11, 12, 12, 12, 13, 13, 13, 13, 14,
 930             15, 15,
 931             16, 16, 16, 17, 17, 17, 18, 18, 18
 932         };
 933
 934         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
 935
 936         testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
 937
 938         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
 939
 940         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
 941     }
 942
 943     /*
 944      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
 945      * modified according to RFC 2060,
 946      * and supplemented with the one example in RFC 2060 itself.
 947      */
 948     {
 949         static const uint8_t imap[] = {
 950             /*  Hi Mom -&Jjo--!
 951                 A&ImIDkQ-.
 952                 &-
 953                 &ZeVnLIqe-
 954                 \
 955                 ~peter
 956                 /mail
 957                 /&ZeVnLIqe-
 958                 /&U,BTFw-
 959             */
 960             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
 961             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
 962             0x26, 0x2d,
 963             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
 964             0x5c,
 965             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
 966             0x2f, 0x6d, 0x61, 0x69, 0x6c,
 967             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
 968             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
 969         };
 970         static const UChar unicode[] = {
 971             /*  Hi Mom -<WHITE SMILING FACE>-!
 972                 A<NOT IDENTICAL TO><ALPHA>.
 973                 &
 974                 [Japanese word "nihongo"]
 975                 \
 976                 ~peter
 977                 /mail
 978                 /<65e5, 672c, 8a9e>
 979                 /<53f0, 5317>
 980             */
 981             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
 982             0x41, 0x2262, 0x0391, 0x2e,
 983             0x26,
 984             0x65e5, 0x672c, 0x8a9e,
 985             0x5c,
 986             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
 987             0x2f, 0x6d, 0x61, 0x69, 0x6c,
 988             0x2f, 0x65e5, 0x672c, 0x8a9e,
 989             0x2f, 0x53f0, 0x5317
 990         };
 991         static const int32_t toUnicodeOffsets[] = {
 992             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
 993             15, 17, 19, 24,
 994             25,
 995             28, 30, 33,
 996             37,
 997             38, 39, 40, 41, 42, 43,
 998             44, 45, 46, 47, 48,
 999             49, 51, 53, 56,
1000             60, 62, 64
1001         };
1002         static const int32_t fromUnicodeOffsets[] = {
1003             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1004             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1005             15, 15,
1006             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1007             19,
1008             20, 21, 22, 23, 24, 25,
1009             26, 27, 28, 29, 30,
1010             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1011             35, 36, 36, 36, 37, 37, 37, 37, 37
1012         };
1013
1014         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1015
1016         testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1017     }
1018
1019     /* Test UTF-8 bad data handling*/
1020     {
1021         static const uint8_t utf8[]={
1022             0x61,
1023             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1024             0x00,
1025             0x62,
1026             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1027             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1028             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1029             0xdf, 0xbf,                     /* 7ff */
1030             0xbf,                           /* truncated tail */
1031             0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1032             0x02
1033         };
1034
1035         static const uint16_t utf8Expected[]={
1036             0x0061,
1037             0xfffd,
1038             0x0000,
1039             0x0062,
1040             0xfffd,
1041             0xfffd,
1042             0xdbff, 0xdfff,
1043             0x07ff,
1044             0xfffd,
1045             0xfffd,
1046             0x0002
1047         };
1048
1049         static const int32_t utf8Offsets[]={
1050             0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1051         };
1052         testConvertToU(utf8, sizeof(utf8),
1053                        utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1054
1055     }
1056
1057     /* Test UTF-32BE bad data handling*/
1058     {
1059         static const uint8_t utf32[]={
1060             0x00, 0x00, 0x00, 0x61,
1061             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1062             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1063             0x00, 0x00, 0x00, 0x62,
1064             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1065             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1066             0x00, 0x00, 0x01, 0x62,
1067             0x00, 0x00, 0x02, 0x62
1068         };
1069
1070         static const uint16_t utf32Expected[]={
1071             0x0061,
1072             0xfffd,         /* 0x110000 out of range */
1073             0xDBFF,         /* 0x10FFFF in range */
1074             0xDFFF,
1075             0x0062,
1076             0xfffd,         /* 0xffffffff out of range */
1077             0xfffd,         /* 0x7fffffff out of range */
1078             0x0162,
1079             0x0262
1080         };
1081
1082         static const int32_t utf32Offsets[]={
1083             0, 4, 8, 8, 12, 16, 20, 24, 28
1084         };
1085         testConvertToU(utf32, sizeof(utf32),
1086                        utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1087
1088     }
1089
1090     /* Test UTF-32LE bad data handling*/
1091     {
1092         static const uint8_t utf32[]={
1093             0x61, 0x00, 0x00, 0x00,
1094             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1095             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1096             0x62, 0x00, 0x00, 0x00,
1097             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1098             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1099             0x62, 0x01, 0x00, 0x00,
1100             0x62, 0x02, 0x00, 0x00,
1101         };
1102
1103         static const uint16_t utf32Expected[]={
1104             0x0061,
1105             0xfffd,         /* 0x110000 out of range */
1106             0xDBFF,         /* 0x10FFFF in range */
1107             0xDFFF,
1108             0x0062,
1109             0xfffd,         /* 0xffffffff out of range */
1110             0xfffd,         /* 0x7fffffff out of range */
1111             0x0162,
1112             0x0262
1113         };
1114
1115         static const int32_t utf32Offsets[]={
1116             0, 4, 8, 8, 12, 16, 20, 24, 28
1117         };
1118         testConvertToU(utf32, sizeof(utf32),
1119             utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1120
1121     }
1122 }
1123
1124 static void TestCoverageMBCS(){
1125 #if 0
1126     UErrorCode status = U_ZERO_ERROR;
1127     const char *directory = loadTestData(&status);
1128     char* tdpath = NULL;
1129     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1130     int len = strlen(directory);
1131     char* index=NULL;
1132
1133     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1134     uprv_strcpy(saveDirectory,u_getDataDirectory());
1135     log_verbose("Retrieved data directory %s \n",saveDirectory);
1136     uprv_strcpy(tdpath,directory);
1137     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1138
1139     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1140             *(index+1)=0;
1141     }
1142     u_setDataDirectory(tdpath);
1143     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1144 #endif
1145
1146     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1147       which is test file for MBCS conversion with single-byte codepage data.*/
1148     {
1149
1150         /* MBCS with single byte codepage data test1.ucm*/
1151         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1152         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1153         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1154
1155         /*from Unicode*/
1156         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1157             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1158     }
1159
1160     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1161       which is test file for MBCS conversion with three-byte codepage data.*/
1162     {
1163
1164         /* MBCS with three byte codepage data test3.ucm*/
1165         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1166         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1167         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1168
1169         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1170         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1171         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1172
1173         /*from Unicode*/
1174         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1175             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1176
1177         /*to Unicode*/
1178         testConvertToU(test3input, sizeof(test3input),
1179             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1180
1181     }
1182
1183     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1184       which is test file for MBCS conversion with four-byte codepage data.*/
1185     {
1186
1187         /* MBCS with three byte codepage data test4.ucm*/
1188         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1189         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1190         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1191
1192         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1193         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1194         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1195
1196         /*from Unicode*/
1197         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1198             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1199
1200         /*to Unicode*/
1201         testConvertToU(test4input, sizeof(test4input),
1202             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1203
1204     }
1205 #if 0
1206     free(tdpath);
1207     /* restore the original data directory */
1208     log_verbose("Setting the data directory to %s \n", saveDirectory);
1209     u_setDataDirectory(saveDirectory);
1210     free(saveDirectory);
1211 #endif
1212
1213 }
1214
1215 static void TestConverterType(const char *convName, UConverterType convType) {
1216     UConverter* myConverter;
1217     UErrorCode err = U_ZERO_ERROR;
1218
1219     myConverter = my_ucnv_open(convName, &err);
1220
1221     if (U_FAILURE(err)) {
1222         log_data_err("Failed to create an %s converter\n", convName);
1223         return;
1224     }
1225     else
1226     {
1227         if (ucnv_getType(myConverter)!=convType) {
1228             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1229                 convName, convType);
1230         }
1231         else {
1232             log_verbose("ucnv_getType %s ok\n", convName);
1233         }
1234     }
1235     ucnv_close(myConverter);
1236 }
1237
1238 static void TestConverterTypesAndStarters()
1239 {
1240     UConverter* myConverter;
1241     UErrorCode err = U_ZERO_ERROR;
1242     UBool mystarters[256];
1243
1244 /*    const UBool expectedKSCstarters[256] = {
1245         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1246         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1247         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1248         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1249         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1250         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1251         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1252         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1253         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1254         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1255         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1256         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1257         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1258         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1259         FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1260         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1261         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1262         TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1263         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1264         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1265         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1266         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1267         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1268         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1269         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1270         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1271
1272
1273     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1274
1275     myConverter = ucnv_open("ksc", &err);
1276     if (U_FAILURE(err)) {
1277       log_data_err("Failed to create an ibm-ksc converter\n");
1278       return;
1279     }
1280     else
1281     {
1282         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1283             log_err("ucnv_getType Failed for ibm-949\n");
1284         else
1285             log_verbose("ucnv_getType ibm-949 ok\n");
1286
1287         if(myConverter!=NULL)
1288             ucnv_getStarters(myConverter, mystarters, &err);
1289
1290         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1291           log_err("Failed ucnv_getStarters for ksc\n");
1292           else
1293           log_verbose("ucnv_getStarters ok\n");*/
1294
1295     }
1296     ucnv_close(myConverter);
1297
1298     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1299     TestConverterType("ibm-878", UCNV_SBCS);
1300     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1301     TestConverterType("ibm-1208", UCNV_UTF8);
1302     TestConverterType("utf-8", UCNV_UTF8);
1303     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1304     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1305     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1306     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1307 #ifdef U_ENABLE_GENERIC_ISO_2022
1308     TestConverterType("iso-2022", UCNV_ISO_2022);
1309 #endif
1310     TestConverterType("hz", UCNV_HZ);
1311     TestConverterType("scsu", UCNV_SCSU);
1312     TestConverterType("x-iscii-de", UCNV_ISCII);
1313     TestConverterType("ascii", UCNV_US_ASCII);
1314     TestConverterType("utf-7", UCNV_UTF7);
1315     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1316     TestConverterType("bocu-1", UCNV_BOCU1);
1317 }
1318
1319 static void
1320 TestAmbiguousConverter(UConverter *cnv) {
1321     static const char inBytes[2]={ 0x61, 0x5c };
1322     UChar outUnicode[20]={ 0, 0, 0, 0 };
1323
1324     const char *s;
1325     UChar *u;
1326     UErrorCode errorCode;
1327     UBool isAmbiguous;
1328
1329     /* try to convert an 'a' and a US-ASCII backslash */
1330     errorCode=U_ZERO_ERROR;
1331     s=inBytes;
1332     u=outUnicode;
1333     ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
1334     if(U_FAILURE(errorCode)) {
1335         /* we do not care about general failures in this test; the input may just not be mappable */
1336         return;
1337     }
1338
1339     if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
1340         /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1341         return;
1342     }
1343
1344     isAmbiguous=ucnv_isAmbiguous(cnv);
1345
1346     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1347     if((outUnicode[1]!=0x5c)!=isAmbiguous) {
1348         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1349             ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
1350         return;
1351     }
1352
1353     if(outUnicode[1]!=0x5c) {
1354         /* needs fixup, fix it */
1355         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1356         if(outUnicode[1]!=0x5c) {
1357             /* the fix failed */
1358             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1359             return;
1360         }
1361     }
1362 }
1363
1364 static void TestAmbiguous()
1365 {
1366     UErrorCode status = U_ZERO_ERROR;
1367     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1368     const char target[] = {
1369         /* "\\usr\\local\\share\\data\\icutest.txt" */
1370         0x5c, 0x75, 0x73, 0x72,
1371         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1372         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1373         0x5c, 0x64, 0x61, 0x74, 0x61,
1374         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1375         0
1376     };
1377     UChar asciiResult[200], sjisResult[200];
1378     int32_t asciiLength = 0, sjisLength = 0, i;
1379     const char *name;
1380
1381     /* enumerate all converters */
1382     status=U_ZERO_ERROR;
1383     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1384         cnv=ucnv_open(name, &status);
1385         if(U_SUCCESS(status)) {
1386             TestAmbiguousConverter(cnv);
1387             ucnv_close(cnv);
1388         } else {
1389             log_err("error: unable to open available converter \"%s\"\n", name);
1390             status=U_ZERO_ERROR;
1391         }
1392     }
1393
1394     sjis_cnv = ucnv_open("ibm-943", &status);
1395     if (U_FAILURE(status))
1396     {
1397         log_data_err("Failed to create a SJIS converter\n");
1398         return;
1399     }
1400     ascii_cnv = ucnv_open("LATIN-1", &status);
1401     if (U_FAILURE(status))
1402     {
1403         log_data_err("Failed to create a LATIN-1 converter\n");
1404         ucnv_close(sjis_cnv);
1405         return;
1406     }
1407     /* convert target from SJIS to Unicode */
1408     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1409     if (U_FAILURE(status))
1410     {
1411         log_err("Failed to convert the SJIS string.\n");
1412         ucnv_close(sjis_cnv);
1413         ucnv_close(ascii_cnv);
1414         return;
1415     }
1416     /* convert target from Latin-1 to Unicode */
1417     asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
1418     if (U_FAILURE(status))
1419     {
1420         log_err("Failed to convert the Latin-1 string.\n");
1421         free(sjisResult);
1422         ucnv_close(sjis_cnv);
1423         ucnv_close(ascii_cnv);
1424         return;
1425     }
1426     if (!ucnv_isAmbiguous(sjis_cnv))
1427     {
1428         log_err("SJIS converter should contain ambiguous character mappings.\n");
1429         free(sjisResult);
1430         free(asciiResult);
1431         ucnv_close(sjis_cnv);
1432         ucnv_close(ascii_cnv);
1433         return;
1434     }
1435     if (u_strcmp(sjisResult, asciiResult) == 0)
1436     {
1437         log_err("File separators for SJIS don't need to be fixed.\n");
1438     }
1439     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1440     if (u_strcmp(sjisResult, asciiResult) != 0)
1441     {
1442         log_err("Fixing file separator for SJIS failed.\n");
1443     }
1444     ucnv_close(sjis_cnv);
1445     ucnv_close(ascii_cnv);
1446 }
1447
1448 static void
1449 TestSignatureDetection(){
1450     /* with null terminated strings */
1451     {
1452         static const char* data[] = {
1453                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1454                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1455                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1456                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1457
1458                 "\xFE\xFF",             /* UTF-16BE */
1459                 "\xFF\xFE",             /* UTF-16LE */
1460                 "\xEF\xBB\xBF",         /* UTF-8    */
1461                 "\x0E\xFE\xFF",         /* SCSU     */
1462
1463                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1464                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1465                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1466                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1467
1468                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1469                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1470                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1471                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1472                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1473
1474                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1475         };
1476         static const char* expected[] = {
1477                 "UTF-16BE",
1478                 "UTF-16LE",
1479                 "UTF-8",
1480                 "SCSU",
1481
1482                 "UTF-16BE",
1483                 "UTF-16LE",
1484                 "UTF-8",
1485                 "SCSU",
1486
1487                 "UTF-16BE",
1488                 "UTF-16LE",
1489                 "UTF-8",
1490                 "SCSU",
1491
1492                 "UTF-7",
1493                 "UTF-7",
1494                 "UTF-7",
1495                 "UTF-7",
1496                 "UTF-7",
1497                 "UTF-EBCDIC"
1498         };
1499         static const int32_t expectedLength[] ={
1500             2,
1501             2,
1502             3,
1503             3,
1504
1505             2,
1506             2,
1507             3,
1508             3,
1509
1510             2,
1511             2,
1512             3,
1513             3,
1514
1515             5,
1516             4,
1517             4,
1518             4,
1519             4,
1520             4
1521         };
1522         int i=0;
1523         UErrorCode err;
1524         int32_t signatureLength = -1;
1525         const char* source = NULL;
1526         const char* enc = NULL;
1527         for( ; i<sizeof(data)/sizeof(char*); i++){
1528             err = U_ZERO_ERROR;
1529             source = data[i];
1530             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1531             if(U_FAILURE(err)){
1532                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1533                 continue;
1534             }
1535             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1536                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1537                 continue;
1538             }
1539             if(signatureLength != expectedLength[i]){
1540                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1541             }
1542         }
1543     }
1544     {
1545         static const char* data[] = {
1546                 "\xFE\xFF\x00",         /* UTF-16BE */
1547                 "\xFF\xFE\x00",         /* UTF-16LE */
1548                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1549                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1550                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1551                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1552                 "\xFE\xFF",             /* UTF-16BE */
1553                 "\xFF\xFE",             /* UTF-16LE */
1554                 "\xEF\xBB\xBF",         /* UTF-8    */
1555                 "\x0E\xFE\xFF",         /* SCSU     */
1556                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1557                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1558                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1559                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1560                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1561                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1562                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1563                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1564                 "\xFB\xEE\x28",         /* BOCU-1   */
1565                 "\xFF\x41\x42"          /* NULL     */
1566         };
1567         static const int len[] = {
1568             3,
1569             3,
1570             4,
1571             4,
1572             4,
1573             4,
1574             2,
1575             2,
1576             3,
1577             3,
1578             4,
1579             4,
1580             4,
1581             4,
1582             4,
1583             4,
1584             5,
1585             5,
1586             3,
1587             3
1588         };
1589
1590         static const char* expected[] = {
1591                 "UTF-16BE",
1592                 "UTF-16LE",
1593                 "UTF-8",
1594                 "SCSU",
1595                 "UTF-32BE",
1596                 "UTF-32LE",
1597                 "UTF-16BE",
1598                 "UTF-16LE",
1599                 "UTF-8",
1600                 "SCSU",
1601                 "UTF-32BE",
1602                 "UTF-32LE",
1603                 "UTF-16BE",
1604                 "UTF-16LE",
1605                 "UTF-8",
1606                 "SCSU",
1607                 "UTF-32BE",
1608                 "UTF-32LE",
1609                 "BOCU-1",
1610                 NULL
1611         };
1612         static const int32_t expectedLength[] ={
1613             2,
1614             2,
1615             3,
1616             3,
1617             4,
1618             4,
1619             2,
1620             2,
1621             3,
1622             3,
1623             4,
1624             4,
1625             2,
1626             2,
1627             3,
1628             3,
1629             4,
1630             4,
1631             3,
1632             0
1633         };
1634         int i=0;
1635         UErrorCode err;
1636         int32_t signatureLength = -1;
1637         int32_t sourceLength=-1;
1638         const char* source = NULL;
1639         const char* enc = NULL;
1640         for( ; i<sizeof(data)/sizeof(char*); i++){
1641             err = U_ZERO_ERROR;
1642             source = data[i];
1643             sourceLength = len[i];
1644             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1645             if(U_FAILURE(err)){
1646                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1647                 continue;
1648             }
1649             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1650                 if(expected[i] !=NULL){
1651                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1652                  continue;
1653                 }
1654             }
1655             if(signatureLength != expectedLength[i]){
1656                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1657             }
1658         }
1659     }
1660 }
1661
1662 void
1663 static TestUTF7() {
1664     /* test input */
1665     static const uint8_t in[]={
1666         /* H - +Jjo- - ! +- +2AHcAQ */
1667         0x48,
1668         0x2d,
1669         0x2b, 0x4a, 0x6a, 0x6f,
1670         0x2d, 0x2d,
1671         0x21,
1672         0x2b, 0x2d,
1673         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1674     };
1675
1676     /* expected test results */
1677     static const int32_t results[]={
1678         /* number of bytes read, code point */
1679         1, 0x48,
1680         1, 0x2d,
1681         4, 0x263a, /* <WHITE SMILING FACE> */
1682         2, 0x2d,
1683         1, 0x21,
1684         2, 0x2b,
1685         7, 0x10401
1686     };
1687
1688     const char *cnvName;
1689     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1690     UErrorCode errorCode=U_ZERO_ERROR;
1691     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1692     if(U_FAILURE(errorCode)) {
1693         log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1694         return;
1695     }
1696     TestNextUChar(cnv, source, limit, results, "UTF-7");
1697     /* Test the condition when source >= sourceLimit */
1698     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1699     cnvName = ucnv_getName(cnv, &errorCode);
1700     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1701         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1702     }
1703     ucnv_close(cnv);
1704 }
1705
1706 void
1707 static TestIMAP() {
1708     /* test input */
1709     static const uint8_t in[]={
1710         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1711         0x48,
1712         0x2d,
1713         0x26, 0x4a, 0x6a, 0x6f,
1714         0x2d, 0x2d,
1715         0x21,
1716         0x26, 0x2d,
1717         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1718     };
1719
1720     /* expected test results */
1721     static const int32_t results[]={
1722         /* number of bytes read, code point */
1723         1, 0x48,
1724         1, 0x2d,
1725         4, 0x263a, /* <WHITE SMILING FACE> */
1726         2, 0x2d,
1727         1, 0x21,
1728         2, 0x26,
1729         7, 0x10401
1730     };
1731
1732     const char *cnvName;
1733     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1734     UErrorCode errorCode=U_ZERO_ERROR;
1735     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1736     if(U_FAILURE(errorCode)) {
1737         log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1738         return;
1739     }
1740     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1741     /* Test the condition when source >= sourceLimit */
1742     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1743     cnvName = ucnv_getName(cnv, &errorCode);
1744     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1745         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1746     }
1747     ucnv_close(cnv);
1748 }
1749
1750 void
1751 static TestUTF8() {
1752     /* test input */
1753     static const uint8_t in[]={
1754         0x61,
1755         0xc2, 0x80,
1756         0xe0, 0xa0, 0x80,
1757         0xf0, 0x90, 0x80, 0x80,
1758         0xf4, 0x84, 0x8c, 0xa1,
1759         0xf0, 0x90, 0x90, 0x81
1760     };
1761
1762     /* expected test results */
1763     static const int32_t results[]={
1764         /* number of bytes read, code point */
1765         1, 0x61,
1766         2, 0x80,
1767         3, 0x800,
1768         4, 0x10000,
1769         4, 0x104321,
1770         4, 0x10401
1771     };
1772
1773     /* error test input */
1774     static const uint8_t in2[]={
1775         0x61,
1776         0xc0, 0x80,                     /* illegal non-shortest form */
1777         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1778         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1779         0xc0, 0xc0,                     /* illegal trail byte */
1780         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1781         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1782         0xfe,                           /* illegal byte altogether */
1783         0x62
1784     };
1785
1786     /* expected error test results */
1787     static const int32_t results2[]={
1788         /* number of bytes read, code point */
1789         1, 0x61,
1790         22, 0x62
1791     };
1792
1793     UConverterToUCallback cb;
1794     const void *p;
1795
1796     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1797     UErrorCode errorCode=U_ZERO_ERROR;
1798     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1799     if(U_FAILURE(errorCode)) {
1800         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1801         return;
1802     }
1803     TestNextUChar(cnv, source, limit, results, "UTF-8");
1804     /* Test the condition when source >= sourceLimit */
1805     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1806
1807     /* test error behavior with a skip callback */
1808     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1809     source=(const char *)in2;
1810     limit=(const char *)(in2+sizeof(in2));
1811     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1812
1813     ucnv_close(cnv);
1814 }
1815
1816 void
1817 static TestCESU8() {
1818     /* test input */
1819     static const uint8_t in[]={
1820         0x61,
1821         0xc2, 0x80,
1822         0xe0, 0xa0, 0x80,
1823         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1824         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1825         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1826         0xef, 0xbf, 0xbc
1827     };
1828
1829     /* expected test results */
1830     static const int32_t results[]={
1831         /* number of bytes read, code point */
1832         1, 0x61,
1833         2, 0x80,
1834         3, 0x800,
1835         6, 0x10000,
1836         3, 0xdc01,
1837         -1,0xd802,  /* may read 3 or 6 bytes */
1838         -1,0x10ffff,/* may read 0 or 3 bytes */
1839         3, 0xfffc
1840     };
1841
1842     /* error test input */
1843     static const uint8_t in2[]={
1844         0x61,
1845         0xc0, 0x80,                     /* illegal non-shortest form */
1846         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1847         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1848         0xc0, 0xc0,                     /* illegal trail byte */
1849         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1850         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1851         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1852         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1853         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1854         0xfe,                           /* illegal byte altogether */
1855         0x62
1856     };
1857
1858     /* expected error test results */
1859     static const int32_t results2[]={
1860         /* number of bytes read, code point */
1861         1, 0x61,
1862         34, 0x62
1863     };
1864
1865     UConverterToUCallback cb;
1866     const void *p;
1867
1868     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1869     UErrorCode errorCode=U_ZERO_ERROR;
1870     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1871     if(U_FAILURE(errorCode)) {
1872         log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1873         return;
1874     }
1875     TestNextUChar(cnv, source, limit, results, "CESU-8");
1876     /* Test the condition when source >= sourceLimit */
1877     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1878
1879     /* test error behavior with a skip callback */
1880     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1881     source=(const char *)in2;
1882     limit=(const char *)(in2+sizeof(in2));
1883     TestNextUChar(cnv, source, limit, results2, "CESU-8");
1884
1885     ucnv_close(cnv);
1886 }
1887
1888 void
1889 static TestUTF16() {
1890     /* test input */
1891     static const uint8_t in1[]={
1892         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
1893     };
1894     static const uint8_t in2[]={
1895         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
1896     };
1897     static const uint8_t in3[]={
1898         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
1899     };
1900
1901     /* expected test results */
1902     static const int32_t results1[]={
1903         /* number of bytes read, code point */
1904         4, 0x4e00,
1905         2, 0xfeff
1906     };
1907     static const int32_t results2[]={
1908         /* number of bytes read, code point */
1909         4, 0x004e,
1910         2, 0xfffe
1911     };
1912     static const int32_t results3[]={
1913         /* number of bytes read, code point */
1914         2, 0xfefe,
1915         2, 0x4e00,
1916         2, 0xfeff,
1917         4, 0x20001
1918     };
1919
1920     const char *source, *limit;
1921
1922     UErrorCode errorCode=U_ZERO_ERROR;
1923     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
1924     if(U_FAILURE(errorCode)) {
1925         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
1926         return;
1927     }
1928
1929     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
1930     TestNextUChar(cnv, source, limit, results1, "UTF-16");
1931
1932     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
1933     ucnv_resetToUnicode(cnv);
1934     TestNextUChar(cnv, source, limit, results2, "UTF-16");
1935
1936     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
1937     ucnv_resetToUnicode(cnv);
1938     TestNextUChar(cnv, source, limit, results3, "UTF-16");
1939
1940     /* Test the condition when source >= sourceLimit */
1941     ucnv_resetToUnicode(cnv);
1942     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1943
1944     ucnv_close(cnv);
1945 }
1946
1947 void
1948 static TestUTF16BE() {
1949     /* test input */
1950     static const uint8_t in[]={
1951         0x00, 0x61,
1952         0x00, 0xc0,
1953         0x00, 0x31,
1954         0x00, 0xf4,
1955         0xce, 0xfe,
1956         0xd8, 0x01, 0xdc, 0x01
1957     };
1958
1959     /* expected test results */
1960     static const int32_t results[]={
1961         /* number of bytes read, code point */
1962         2, 0x61,
1963         2, 0xc0,
1964         2, 0x31,
1965         2, 0xf4,
1966         2, 0xcefe,
1967         4, 0x10401
1968     };
1969
1970     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1971     UErrorCode errorCode=U_ZERO_ERROR;
1972     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
1973     if(U_FAILURE(errorCode)) {
1974         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
1975         return;
1976     }
1977     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
1978     /* Test the condition when source >= sourceLimit */
1979     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1980     /*Test for the condition where there is an invalid character*/
1981     {
1982         static const uint8_t source2[]={0x61};
1983         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
1984         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
1985     }
1986 #if 0
1987     /*
1988      * Test disabled because currently the UTF-16BE/LE converters are supposed
1989      * to not set errors for unpaired surrogates.
1990      * This may change with
1991      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
1992      */
1993
1994     /*Test for the condition where there is a surrogate pair*/
1995     {
1996         const uint8_t source2[]={0xd8, 0x01};
1997         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
1998     }
1999 #endif
2000     ucnv_close(cnv);
2001 }
2002
2003 static void
2004 TestUTF16LE() {
2005     /* test input */
2006     static const uint8_t in[]={
2007         0x61, 0x00,
2008         0x31, 0x00,
2009         0x4e, 0x2e,
2010         0x4e, 0x00,
2011         0x01, 0xd8, 0x01, 0xdc
2012     };
2013
2014     /* expected test results */
2015     static const int32_t results[]={
2016         /* number of bytes read, code point */
2017         2, 0x61,
2018         2, 0x31,
2019         2, 0x2e4e,
2020         2, 0x4e,
2021         4, 0x10401
2022     };
2023
2024     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2025     UErrorCode errorCode=U_ZERO_ERROR;
2026     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2027     if(U_FAILURE(errorCode)) {
2028         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2029         return;
2030     }
2031     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2032     /* Test the condition when source >= sourceLimit */
2033     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2034     /*Test for the condition where there is an invalid character*/
2035     {
2036         static const uint8_t source2[]={0x61};
2037         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2038         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2039     }
2040 #if 0
2041     /*
2042      * Test disabled because currently the UTF-16BE/LE converters are supposed
2043      * to not set errors for unpaired surrogates.
2044      * This may change with
2045      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2046      */
2047
2048     /*Test for the condition where there is a surrogate character*/
2049     {
2050         static const uint8_t source2[]={0x01, 0xd8};
2051         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2052     }
2053 #endif
2054
2055     ucnv_close(cnv);
2056 }
2057
2058 void
2059 static TestUTF32() {
2060     /* test input */
2061     static const uint8_t in1[]={
2062         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2063     };
2064     static const uint8_t in2[]={
2065         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2066     };
2067     static const uint8_t in3[]={
2068         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2069     };
2070
2071     /* expected test results */
2072     static const int32_t results1[]={
2073         /* number of bytes read, code point */
2074         8, 0x100f00,
2075         4, 0xfeff
2076     };
2077     static const int32_t results2[]={
2078         /* number of bytes read, code point */
2079         8, 0x0f1000,
2080         4, 0xfffe
2081     };
2082     static const int32_t results3[]={
2083         /* number of bytes read, code point */
2084         4, 0xfefe,
2085         4, 0x100f00,
2086         4, 0xfffd, /* unmatched surrogate */
2087         4, 0xfffd  /* unmatched surrogate */
2088     };
2089
2090     const char *source, *limit;
2091
2092     UErrorCode errorCode=U_ZERO_ERROR;
2093     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2094     if(U_FAILURE(errorCode)) {
2095         log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2096         return;
2097     }
2098
2099     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2100     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2101
2102     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2103     ucnv_resetToUnicode(cnv);
2104     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2105
2106     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2107     ucnv_resetToUnicode(cnv);
2108     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2109
2110     /* Test the condition when source >= sourceLimit */
2111     ucnv_resetToUnicode(cnv);
2112     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2113
2114     ucnv_close(cnv);
2115 }
2116
2117 static void
2118 TestUTF32BE() {
2119     /* test input */
2120     static const uint8_t in[]={
2121         0x00, 0x00, 0x00, 0x61,
2122         0x00, 0x00, 0x30, 0x61,
2123         0x00, 0x00, 0xdc, 0x00,
2124         0x00, 0x00, 0xd8, 0x00,
2125         0x00, 0x00, 0xdf, 0xff,
2126         0x00, 0x00, 0xff, 0xfe,
2127         0x00, 0x10, 0xab, 0xcd,
2128         0x00, 0x10, 0xff, 0xff
2129     };
2130
2131     /* expected test results */
2132     static const int32_t results[]={
2133         /* number of bytes read, code point */
2134         4, 0x61,
2135         4, 0x3061,
2136         4, 0xfffd,
2137         4, 0xfffd,
2138         4, 0xfffd,
2139         4, 0xfffe,
2140         4, 0x10abcd,
2141         4, 0x10ffff
2142     };
2143
2144     /* error test input */
2145     static const uint8_t in2[]={
2146         0x00, 0x00, 0x00, 0x61,
2147         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2148         0x00, 0x00, 0x00, 0x62,
2149         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2150         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2151         0x00, 0x00, 0x01, 0x62,
2152         0x00, 0x00, 0x02, 0x62
2153     };
2154
2155     /* expected error test results */
2156     static const int32_t results2[]={
2157         /* number of bytes read, code point */
2158         4,  0x61,
2159         8,  0x62,
2160         12, 0x162,
2161         4,  0x262
2162     };
2163
2164     UConverterToUCallback cb;
2165     const void *p;
2166
2167     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2168     UErrorCode errorCode=U_ZERO_ERROR;
2169     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2170     if(U_FAILURE(errorCode)) {
2171         log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2172         return;
2173     }
2174     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2175
2176     /* Test the condition when source >= sourceLimit */
2177     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2178
2179     /* test error behavior with a skip callback */
2180     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2181     source=(const char *)in2;
2182     limit=(const char *)(in2+sizeof(in2));
2183     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2184
2185     ucnv_close(cnv);
2186 }
2187
2188 static void
2189 TestUTF32LE() {
2190     /* test input */
2191     static const uint8_t in[]={
2192         0x61, 0x00, 0x00, 0x00,
2193         0x61, 0x30, 0x00, 0x00,
2194         0x00, 0xdc, 0x00, 0x00,
2195         0x00, 0xd8, 0x00, 0x00,
2196         0xff, 0xdf, 0x00, 0x00,
2197         0xfe, 0xff, 0x00, 0x00,
2198         0xcd, 0xab, 0x10, 0x00,
2199         0xff, 0xff, 0x10, 0x00
2200     };
2201
2202     /* expected test results */
2203     static const int32_t results[]={
2204         /* number of bytes read, code point */
2205         4, 0x61,
2206         4, 0x3061,
2207         4, 0xfffd,
2208         4, 0xfffd,
2209         4, 0xfffd,
2210         4, 0xfffe,
2211         4, 0x10abcd,
2212         4, 0x10ffff
2213     };
2214
2215     /* error test input */
2216     static const uint8_t in2[]={
2217         0x61, 0x00, 0x00, 0x00,
2218         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2219         0x62, 0x00, 0x00, 0x00,
2220         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2221         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2222         0x62, 0x01, 0x00, 0x00,
2223         0x62, 0x02, 0x00, 0x00,
2224     };
2225
2226     /* expected error test results */
2227     static const int32_t results2[]={
2228         /* number of bytes read, code point */
2229         4,  0x61,
2230         8,  0x62,
2231         12, 0x162,
2232         4,  0x262,
2233     };
2234
2235     UConverterToUCallback cb;
2236     const void *p;
2237
2238     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2239     UErrorCode errorCode=U_ZERO_ERROR;
2240     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2241     if(U_FAILURE(errorCode)) {
2242         log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2243         return;
2244     }
2245     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2246
2247     /* Test the condition when source >= sourceLimit */
2248     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2249
2250     /* test error behavior with a skip callback */
2251     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2252     source=(const char *)in2;
2253     limit=(const char *)(in2+sizeof(in2));
2254     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2255
2256     ucnv_close(cnv);
2257 }
2258
2259 static void
2260 TestLATIN1() {
2261     /* test input */
2262     static const uint8_t in[]={
2263        0x61,
2264        0x31,
2265        0x32,
2266        0xc0,
2267        0xf0,
2268        0xf4,
2269     };
2270
2271     /* expected test results */
2272     static const int32_t results[]={
2273         /* number of bytes read, code point */
2274         1, 0x61,
2275         1, 0x31,
2276         1, 0x32,
2277         1, 0xc0,
2278         1, 0xf0,
2279         1, 0xf4,
2280     };
2281     static const uint16_t in1[] = {
2282         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2283         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2284         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2285         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2286         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2287         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2288         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2289         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2290         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2291         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2292         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2293         0xcb, 0x82
2294     };
2295     static const uint8_t out1[] = {
2296         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2297         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2298         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2299         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2300         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2301         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2302         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2303         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2304         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2305         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2306         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2307         0xcb, 0x82
2308     };
2309     static const uint16_t in2[]={
2310         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2311         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2312         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2313         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2314         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2315         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2316         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2317         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2318         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2319         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2320         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2321         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2322         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2323         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2324         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2325         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2326         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2327         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2328         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2329         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2330         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2331         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2332         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2333         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2334         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2335         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2336         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2337         0x37, 0x20, 0x2A, 0x2F,
2338     };
2339     static const unsigned char out2[]={
2340         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2341         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2342         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2343         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2344         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2345         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2346         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2347         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2348         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2349         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2350         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2351         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2352         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2353         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2354         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2355         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2356         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2357         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2358         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2359         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2360         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2361         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2362         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2363         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2364         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2365         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2366         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2367         0x37, 0x20, 0x2A, 0x2F,
2368     };
2369     const char *source=(const char *)in;
2370     const char *limit=(const char *)in+sizeof(in);
2371
2372     UErrorCode errorCode=U_ZERO_ERROR;
2373     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2374     if(U_FAILURE(errorCode)) {
2375         log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2376         return;
2377     }
2378     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2379     /* Test the condition when source >= sourceLimit */
2380     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2381     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2382     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2383
2384     ucnv_close(cnv);
2385 }
2386
2387 static void
2388 TestSBCS() {
2389     /* test input */
2390     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2391     /* expected test results */
2392     static const int32_t results[]={
2393         /* number of bytes read, code point */
2394         1, 0x61,
2395         1, 0xbf,
2396         1, 0xc4,
2397         1, 0x2021,
2398         1, 0xf8ff,
2399         1, 0x00d9
2400     };
2401
2402     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2403     UErrorCode errorCode=U_ZERO_ERROR;
2404     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2405     if(U_FAILURE(errorCode)) {
2406         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2407         return;
2408     }
2409     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2410     /* Test the condition when source >= sourceLimit */
2411     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2412     /*Test for Illegal character */ /*
2413     {
2414     static const uint8_t input1[]={ 0xA1 };
2415     const char* illegalsource=(const char*)input1;
2416     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2417     }
2418    */
2419     ucnv_close(cnv);
2420 }
2421
2422 static void
2423 TestDBCS() {
2424     /* test input */
2425     static const uint8_t in[]={
2426         0x44, 0x6a,
2427         0xc4, 0x9c,
2428         0x7a, 0x74,
2429         0x46, 0xab,
2430         0x42, 0x5b,
2431
2432     };
2433
2434     /* expected test results */
2435     static const int32_t results[]={
2436         /* number of bytes read, code point */
2437         2, 0x00a7,
2438         2, 0xe1d2,
2439         2, 0x6962,
2440         2, 0xf842,
2441         2, 0xffe5,
2442     };
2443
2444     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2445     UErrorCode errorCode=U_ZERO_ERROR;
2446
2447     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2448     if(U_FAILURE(errorCode)) {
2449         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2450         return;
2451     }
2452     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2453     /* Test the condition when source >= sourceLimit */
2454     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2455     /*Test for the condition where there is an invalid character*/
2456     {
2457         static const uint8_t source2[]={0x1a, 0x1b};
2458         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2459     }
2460     /*Test for the condition where we have a truncated char*/
2461     {
2462         static const uint8_t source1[]={0xc4};
2463         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2464         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2465     }
2466     ucnv_close(cnv);
2467 }
2468
2469 static void
2470 TestMBCS() {
2471     /* test input */
2472     static const uint8_t in[]={
2473         0x01,
2474         0xa6, 0xa3,
2475         0x00,
2476         0xa6, 0xa1,
2477         0x08,
2478         0xc2, 0x76,
2479         0xc2, 0x78,
2480
2481     };
2482
2483     /* expected test results */
2484     static const int32_t results[]={
2485         /* number of bytes read, code point */
2486         1, 0x0001,
2487         2, 0x250c,
2488         1, 0x0000,
2489         2, 0x2500,
2490         1, 0x0008,
2491         2, 0xd60c,
2492         2, 0xd60e,
2493     };
2494
2495     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2496     UErrorCode errorCode=U_ZERO_ERROR;
2497
2498     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2499     if(U_FAILURE(errorCode)) {
2500         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2501         return;
2502     }
2503     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2504     /* Test the condition when source >= sourceLimit */
2505     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2506     /*Test for the condition where there is an invalid character*/
2507     {
2508         static const uint8_t source2[]={0xa1, 0x01};
2509         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2510     }
2511     /*Test for the condition where we have a truncated char*/
2512     {
2513         static const uint8_t source1[]={0xc4};
2514         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2515         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2516     }
2517     ucnv_close(cnv);
2518
2519 }
2520
2521 #ifdef U_ENABLE_GENERIC_ISO_2022
2522
2523 static void
2524 TestISO_2022() {
2525     /* test input */
2526     static const uint8_t in[]={
2527         0x1b, 0x25, 0x42,
2528         0x31,
2529         0x32,
2530         0x61,
2531         0xc2, 0x80,
2532         0xe0, 0xa0, 0x80,
2533         0xf0, 0x90, 0x80, 0x80
2534     };
2535
2536
2537
2538     /* expected test results */
2539     static const int32_t results[]={
2540         /* number of bytes read, code point */
2541         4, 0x0031,  /* 4 bytes including the escape sequence */
2542         1, 0x0032,
2543         1, 0x61,
2544         2, 0x80,
2545         3, 0x800,
2546         4, 0x10000
2547     };
2548
2549     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2550     UErrorCode errorCode=U_ZERO_ERROR;
2551     UConverter *cnv;
2552
2553     cnv=ucnv_open("ISO_2022", &errorCode);
2554     if(U_FAILURE(errorCode)) {
2555         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2556         return;
2557     }
2558     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2559
2560     /* Test the condition when source >= sourceLimit */
2561     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2562     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2563     /*Test for the condition where we have a truncated char*/
2564     {
2565         static const uint8_t source1[]={0xc4};
2566         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2567         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2568     }
2569     /*Test for the condition where there is an invalid character*/
2570     {
2571         static const uint8_t source2[]={0xa1, 0x01};
2572         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2573     }
2574     ucnv_close(cnv);
2575 }
2576
2577 #endif
2578
2579 static void
2580 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2581     const UChar* uSource;
2582     const UChar* uSourceLimit;
2583     const char* cSource;
2584     const char* cSourceLimit;
2585     UChar *uTargetLimit =NULL;
2586     UChar *uTarget;
2587     char *cTarget;
2588     const char *cTargetLimit;
2589     char *cBuf;
2590     UChar *uBuf,*test;
2591     int32_t uBufSize = 120;
2592     int len=0;
2593     int i=2;
2594     UErrorCode errorCode=U_ZERO_ERROR;
2595     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2596     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2597     ucnv_reset(cnv);
2598     for(;--i>0; ){
2599         uSource = (UChar*) source;
2600         uSourceLimit=(const UChar*)sourceLimit;
2601         cTarget = cBuf;
2602         uTarget = uBuf;
2603         cSource = cBuf;
2604         cTargetLimit = cBuf;
2605         uTargetLimit = uBuf;
2606
2607         do{
2608
2609             cTargetLimit = cTargetLimit+ i;
2610             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2611             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2612                errorCode=U_ZERO_ERROR;
2613                 continue;
2614             }
2615
2616             if(U_FAILURE(errorCode)){
2617                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2618                 return;
2619             }
2620
2621         }while (uSource<uSourceLimit);
2622
2623         cSourceLimit =cTarget;
2624         do{
2625             uTargetLimit=uTargetLimit+i;
2626             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2627             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2628                errorCode=U_ZERO_ERROR;
2629                 continue;
2630             }
2631             if(U_FAILURE(errorCode)){
2632                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2633                     return;
2634             }
2635         }while(cSource<cSourceLimit);
2636
2637         uSource = source;
2638         test =uBuf;
2639         for(len=0;len<(int)(source - sourceLimit);len++){
2640             if(uBuf[len]!=uSource[len]){
2641                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2642             }
2643         }
2644     }
2645     free(uBuf);
2646     free(cBuf);
2647 }
2648 /* Test for Jitterbug 778 */
2649 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2650     const UChar* uSource;
2651     const UChar* uSourceLimit;
2652     const char* cSource;
2653     UChar *uTargetLimit =NULL;
2654     UChar *uTarget;
2655     char *cTarget;
2656     const char *cTargetLimit;
2657     char *cBuf;
2658     UChar *uBuf,*test;
2659     int32_t uBufSize = 120;
2660     int numCharsInTarget=0;
2661     UErrorCode errorCode=U_ZERO_ERROR;
2662     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2663     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2664     uSource = source;
2665     uSourceLimit=sourceLimit;
2666     cTarget = cBuf;
2667     cTargetLimit = cBuf +uBufSize*5;
2668     uTarget = uBuf;
2669     uTargetLimit = uBuf+ uBufSize*5;
2670     ucnv_reset(cnv);
2671     numCharsInTarget=ucnv_fromUChars( cnv , cTarget, (cTargetLimit-cTarget),uSource,(uSourceLimit-uSource), &errorCode);
2672     if(U_FAILURE(errorCode)){
2673         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2674         return;
2675     }
2676     cSource = cBuf;
2677     test =uBuf;
2678     ucnv_toUChars(cnv,uTarget,(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2679     if(U_FAILURE(errorCode)){
2680         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2681         return;
2682     }
2683     uSource = source;
2684     while(uSource<uSourceLimit){
2685         if(*test!=*uSource){
2686
2687             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2688         }
2689         uSource++;
2690         test++;
2691     }
2692     free(uBuf);
2693     free(cBuf);
2694 }
2695
2696 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2697     const UChar* uSource;
2698     const UChar* uSourceLimit;
2699     const char* cSource;
2700     const char* cSourceLimit;
2701     UChar *uTargetLimit =NULL;
2702     UChar *uTarget;
2703     char *cTarget;
2704     const char *cTargetLimit;
2705     char *cBuf;
2706     UChar *uBuf,*test;
2707     int32_t uBufSize = 120;
2708     int len=0;
2709     int i=2;
2710     const UChar *temp = sourceLimit;
2711     UErrorCode errorCode=U_ZERO_ERROR;
2712     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2713     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2714
2715     ucnv_reset(cnv);
2716     for(;--i>0;){
2717         uSource = (UChar*) source;
2718         cTarget = cBuf;
2719         uTarget = uBuf;
2720         cSource = cBuf;
2721         cTargetLimit = cBuf;
2722         uTargetLimit = uBuf+uBufSize*5;
2723         cTargetLimit = cTargetLimit+uBufSize*10;
2724         uSourceLimit=uSource;
2725         do{
2726
2727             if (uSourceLimit < sourceLimit) {
2728                 uSourceLimit = uSourceLimit+1;
2729             }
2730             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2731             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2732                errorCode=U_ZERO_ERROR;
2733                 continue;
2734             }
2735
2736             if(U_FAILURE(errorCode)){
2737                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2738                 return;
2739             }
2740
2741         }while (uSource<temp);
2742
2743         cSourceLimit =cBuf;
2744         do{
2745             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2746                 cSourceLimit = cSourceLimit+1;
2747             }
2748             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2749             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2750                errorCode=U_ZERO_ERROR;
2751                 continue;
2752             }
2753             if(U_FAILURE(errorCode)){
2754                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2755                     return;
2756             }
2757         }while(cSource<cTarget);
2758
2759         uSource = source;
2760         test =uBuf;
2761         for(;len<(int)(source - sourceLimit);len++){
2762             if(uBuf[len]!=uSource[len]){
2763                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2764             }
2765         }
2766     }
2767     free(uBuf);
2768     free(cBuf);
2769 }
2770 static void
2771 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2772                      const uint16_t results[], const char* message){
2773      const char* s0;
2774      const char* s=(char*)source;
2775      const uint16_t *r=results;
2776      UErrorCode errorCode=U_ZERO_ERROR;
2777      uint32_t c,exC;
2778      ucnv_reset(cnv);
2779      while(s<limit) {
2780         s0=s;
2781         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2782         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2783             break; /* no more significant input */
2784         } else if(U_FAILURE(errorCode)) {
2785             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2786             break;
2787         } else {
2788             if(UTF_IS_FIRST_SURROGATE(*r)){
2789                 int i =0, len = 2;
2790                 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2791                 r++;
2792             }else{
2793                 exC = *r;
2794             }
2795             if(c!=(uint32_t)(exC))
2796                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2797         }
2798         r++;
2799     }
2800 }
2801
2802 static int TestJitterbug930(const char* enc){
2803     UErrorCode err = U_ZERO_ERROR;
2804     UConverter*converter;
2805     char out[80];
2806     char*target = out;
2807     UChar in[4];
2808     const UChar*source = in;
2809     int32_t off[80];
2810     int32_t* offsets = off;
2811     int numOffWritten=0;
2812     UBool flush = 0;
2813     converter = my_ucnv_open(enc, &err);
2814
2815     in[0] = 0x41;     /* 0x4E00;*/
2816     in[1] = 0x4E01;
2817     in[2] = 0x4E02;
2818     in[3] = 0x4E03;
2819
2820     memset(off, '*', sizeof(off));
2821
2822     ucnv_fromUnicode (converter,
2823             &target,
2824             target+2,
2825             &source,
2826             source+3,
2827             offsets,
2828             flush,
2829             &err);
2830
2831         /* writes three bytes into the output buffer: 41 1B 24
2832         * but offsets contains 0 1 1
2833     */
2834     while(*offsets< off[10]){
2835         numOffWritten++;
2836         offsets++;
2837     }
2838     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2839     if(numOffWritten!= (int)(target-out)){
2840         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2841     }
2842
2843     err = U_ZERO_ERROR;
2844
2845     memset(off,'*' , sizeof(off));
2846
2847     flush = 1;
2848     offsets=off;
2849     ucnv_fromUnicode (converter,
2850             &target,
2851             target+4,
2852             &source,
2853             source,
2854             offsets,
2855             flush,
2856             &err);
2857     numOffWritten=0;
2858     while(*offsets< off[10]){
2859         numOffWritten++;
2860         if(*offsets!= -1){
2861             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2862         }
2863         offsets++;
2864     }
2865
2866     /* writes 42 43 7A into output buffer,
2867      * offsets contains -1 -1 -1
2868      */
2869     ucnv_close(converter);
2870     return 0;
2871 }
2872
2873 static void
2874 TestHZ() {
2875     /* test input */
2876     static const uint16_t in[]={
2877             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2878             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2879             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2880             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2881             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2882             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2883             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2884             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2885             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2886             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2887             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2888             0x005A, 0x005B, 0x005C, 0x000A
2889       };
2890     const UChar* uSource;
2891     const UChar* uSourceLimit;
2892     const char* cSource;
2893     const char* cSourceLimit;
2894     UChar *uTargetLimit =NULL;
2895     UChar *uTarget;
2896     char *cTarget;
2897     const char *cTargetLimit;
2898     char *cBuf;
2899     UChar *uBuf,*test;
2900     int32_t uBufSize = 120;
2901     UErrorCode errorCode=U_ZERO_ERROR;
2902     UConverter *cnv;
2903     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
2904     int32_t* myOff= offsets;
2905     cnv=ucnv_open("HZ", &errorCode);
2906     if(U_FAILURE(errorCode)) {
2907         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
2908         return;
2909     }
2910
2911     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2912     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2913     uSource = (const UChar*)&in[0];
2914     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
2915     cTarget = cBuf;
2916     cTargetLimit = cBuf +uBufSize*5;
2917     uTarget = uBuf;
2918     uTargetLimit = uBuf+ uBufSize*5;
2919     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
2920     if(U_FAILURE(errorCode)){
2921         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2922         return;
2923     }
2924     cSource = cBuf;
2925     cSourceLimit =cTarget;
2926     test =uBuf;
2927     myOff=offsets;
2928     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
2929     if(U_FAILURE(errorCode)){
2930         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2931         return;
2932     }
2933     uSource = (const UChar*)&in[0];
2934     while(uSource<uSourceLimit){
2935         if(*test!=*uSource){
2936
2937             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2938         }
2939         uSource++;
2940         test++;
2941     }
2942     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
2943     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2944     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2945     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
2946     TestJitterbug930("csISO2022JP");
2947     ucnv_close(cnv);
2948     free(offsets);
2949     free(uBuf);
2950     free(cBuf);
2951 }
2952
2953 static void
2954 TestISCII(){
2955         /* test input */
2956     static const uint16_t in[]={
2957         /* test full range of Devanagari */
2958         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
2959         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
2960         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
2961         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
2962         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
2963         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
2964         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
2965         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
2966         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
2967         0x096D,0x096E,0x096F,
2968         /* test Soft halant*/
2969         0x0915,0x094d, 0x200D,
2970         /* test explicit halant */
2971         0x0915,0x094d, 0x200c,
2972         /* test double danda */
2973         0x965,
2974         /* test ASCII */
2975         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2976         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2977         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2978         /* tests from Lotus */
2979         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
2980         0x0930,0x094D,0x200D,
2981         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
2982         0x0915,0x0921,0x002B,0x095F,
2983         /* tamil range */
2984         0x0B86, 0xB87, 0xB88,
2985         /* telugu range */
2986         0x0C05, 0x0C02, 0x0C03,0x0c31,
2987         /* kannada range */
2988         0x0C85, 0xC82, 0x0C83,
2989         /* test Abbr sign and Anudatta */
2990         0x0970, 0x952,
2991        /* 0x0958,
2992         0x0959,
2993         0x095A,
2994         0x095B,
2995         0x095C,
2996         0x095D,
2997         0x095E,
2998         0x095F,*/
2999         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3000         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3001         0x090C ,
3002         0x0962,
3003         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3004         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3005         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3006         0x093D /* Avagraha  0xEA, 0xE9*/,
3007         0x0958,
3008         0x0959,
3009         0x095A,
3010         0x095B,
3011         0x095C,
3012         0x095D,
3013         0x095E,
3014         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3015       };
3016     static const unsigned char byteArr[]={
3017
3018         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3019         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3020         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3021         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3022         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3023         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3024         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3025         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3026         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3027         0xf8,0xf9,0xfa,
3028         /* test soft halant */
3029         0xb3, 0xE8, 0xE9,
3030         /* test explicit halant */
3031         0xb3, 0xE8, 0xE8,
3032         /* test double danda */
3033         0xea, 0xea,
3034         /* test ASCII */
3035         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3036         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3037         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3038         /* test ATR code */
3039
3040         /* tests from Lotus */
3041         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3042         0xEF,0x42,0xCF,0xE8,0xD9,
3043         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3044         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3045         /* tamil range */
3046         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3047         /* telugu range */
3048         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3049         /* kannada range */
3050         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3051         /* anudatta and abbreviation sign */
3052         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3053
3054
3055         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3056
3057         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3058
3059         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3060
3061         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3062
3063         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3064
3065         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3066
3067         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3068
3069         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3070
3071         0xB3, 0xE9, /* Ka + NUKTA */
3072
3073         0xB4, 0xE9, /* Kha + NUKTA */
3074
3075         0xB5, 0xE9, /* Ga + NUKTA */
3076
3077         0xBA, 0xE9,
3078
3079         0xBF, 0xE9,
3080
3081         0xC0, 0xE9,
3082
3083         0xC9, 0xE9,
3084         /* INV halant RA    */
3085         0xD9, 0xE8, 0xCF,
3086         0x00, 0x00A0,
3087         /* just consume unhandled codepoints */
3088         0xEF, 0x30,
3089
3090     };
3091     testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3092     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3093
3094 }
3095
3096 static void
3097 TestISO_2022_JP() {
3098     /* test input */
3099     static const uint16_t in[]={
3100         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3101         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3102         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3103         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3104         0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3105         0x201D, 0x3014, 0x000D, 0x000A,
3106         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3107         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3108         };
3109     const UChar* uSource;
3110     const UChar* uSourceLimit;
3111     const char* cSource;
3112     const char* cSourceLimit;
3113     UChar *uTargetLimit =NULL;
3114     UChar *uTarget;
3115     char *cTarget;
3116     const char *cTargetLimit;
3117     char *cBuf;
3118     UChar *uBuf,*test;
3119     int32_t uBufSize = 120;
3120     UErrorCode errorCode=U_ZERO_ERROR;
3121     UConverter *cnv;
3122     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3123     int32_t* myOff= offsets;
3124     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3125     if(U_FAILURE(errorCode)) {
3126         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3127         return;
3128     }
3129
3130     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3131     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3132     uSource = (const UChar*)&in[0];
3133     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3134     cTarget = cBuf;
3135     cTargetLimit = cBuf +uBufSize*5;
3136     uTarget = uBuf;
3137     uTargetLimit = uBuf+ uBufSize*5;
3138     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3139     if(U_FAILURE(errorCode)){
3140         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3141         return;
3142     }
3143     cSource = cBuf;
3144     cSourceLimit =cTarget;
3145     test =uBuf;
3146     myOff=offsets;
3147     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3148     if(U_FAILURE(errorCode)){
3149         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3150         return;
3151     }
3152
3153     uSource = (const UChar*)&in[0];
3154     while(uSource<uSourceLimit){
3155         if(*test!=*uSource){
3156
3157             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3158         }
3159         uSource++;
3160         test++;
3161     }
3162
3163     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3164     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3165     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3166     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3167     TestJitterbug930("csISO2022JP");
3168     ucnv_close(cnv);
3169     free(uBuf);
3170     free(cBuf);
3171     free(offsets);
3172 }
3173
3174 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3175     const UChar* uSource;
3176     const UChar* uSourceLimit;
3177     const char* cSource;
3178     const char* cSourceLimit;
3179     UChar *uTargetLimit =NULL;
3180     UChar *uTarget;
3181     char *cTarget;
3182     const char *cTargetLimit;
3183     char *cBuf;
3184     UChar *uBuf,*test;
3185     int32_t uBufSize = 120*10;
3186     UErrorCode errorCode=U_ZERO_ERROR;
3187     UConverter *cnv;
3188     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3189     int32_t* myOff= offsets;
3190     cnv=my_ucnv_open(conv, &errorCode);
3191     if(U_FAILURE(errorCode)) {
3192         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3193         return;
3194     }
3195
3196     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3197     cBuf =(char*)malloc(uBufSize * sizeof(char));
3198     uSource = (const UChar*)&in[0];
3199     uSourceLimit=uSource+len;
3200     cTarget = cBuf;
3201     cTargetLimit = cBuf +uBufSize;
3202     uTarget = uBuf;
3203     uTargetLimit = uBuf+ uBufSize;
3204     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3205     if(U_FAILURE(errorCode)){
3206         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3207         return;
3208     }
3209     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3210     cSource = cBuf;
3211     cSourceLimit =cTarget;
3212     test =uBuf;
3213     myOff=offsets;
3214     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3215     if(U_FAILURE(errorCode)){
3216         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3217         return;
3218     }
3219
3220     uSource = (const UChar*)&in[0];
3221     while(uSource<uSourceLimit){
3222         if(*test!=*uSource){
3223             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3224         }
3225         uSource++;
3226         test++;
3227     }
3228     TestSmallTargetBuffer(&in[0],(const UChar*)&in[len],cnv);
3229     TestSmallSourceBuffer(&in[0],(const UChar*)&in[len],cnv);
3230     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3231     if(byteArr && byteArrLen!=0){
3232         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3233         TestToAndFromUChars(&in[0],(const UChar*)&in[len],cnv);
3234         {
3235             cSource = byteArr;
3236             cSourceLimit = cSource+byteArrLen;
3237             test=uBuf;
3238             myOff = offsets;
3239             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3240             if(U_FAILURE(errorCode)){
3241                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3242                 return;
3243             }
3244
3245             uSource = (const UChar*)&in[0];
3246             while(uSource<uSourceLimit){
3247                 if(*test!=*uSource){
3248                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3249                 }
3250                 uSource++;
3251                 test++;
3252             }
3253         }
3254     }
3255
3256     ucnv_close(cnv);
3257     free(uBuf);
3258     free(cBuf);
3259     free(offsets);
3260 }
3261 static UChar U_CALLCONV
3262 _charAt(int32_t offset, void *context) {
3263     return ((char*)context)[offset];
3264 }
3265
3266 static int32_t
3267 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3268     int32_t srcIndex=0;
3269     int32_t dstIndex=0;
3270     if(U_FAILURE(*status)){
3271         return 0;
3272     }
3273     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3274         *status = U_ILLEGAL_ARGUMENT_ERROR;
3275         return 0;
3276     }
3277     if(srcLen==-1){
3278         srcLen = uprv_strlen(src);
3279     }
3280
3281     for (; srcIndex<srcLen; ) {
3282         UChar32 c = src[srcIndex++];
3283         if (c == 0x005C /*'\\'*/) {
3284             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3285             if (c == (UChar32)0xFFFFFFFF) {
3286                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3287                 break; /* invalid escape sequence */
3288             }
3289         }
3290         if(dstIndex < dstLen){
3291             if(c>0xFFFF){
3292                dst[dstIndex++] = UTF16_LEAD(c);
3293                if(dstIndex<dstLen){
3294                     dst[dstIndex]=UTF16_TRAIL(c);
3295                }else{
3296                    *status=U_BUFFER_OVERFLOW_ERROR;
3297                }
3298             }else{
3299                 dst[dstIndex]=(UChar)c;
3300             }
3301
3302         }else{
3303             *status = U_BUFFER_OVERFLOW_ERROR;
3304         }
3305         dstIndex++; /* for preflighting */
3306     }
3307     return dstIndex;
3308 }
3309
3310 static void
3311 TestFullRoundtrip(const char* cp){
3312     UChar usource[10] ={0};
3313     UChar nsrc[10] = {0};
3314     uint32_t i=1;
3315     int len=0, ulen;
3316     nsrc[0]=0x0061;
3317     /* Test codepoint 0 */
3318     TestConv(usource,1,cp,"",NULL,0);
3319     TestConv(usource,2,cp,"",NULL,0);
3320     nsrc[2]=0x5555;
3321     TestConv(nsrc,3,cp,"",NULL,0);
3322
3323     for(;i<=0x10FFFF;i++){
3324         if(i==0xD800){
3325             i=0xDFFF;
3326             continue;
3327         }
3328         if(i<=0xFFFF){
3329             usource[0] =(UChar) i;
3330             len=1;
3331         }else{
3332             usource[0]=UTF16_LEAD(i);
3333             usource[1]=UTF16_TRAIL(i);
3334             len=2;
3335         }
3336         ulen=len;
3337         if(i==0x80) {
3338             usource[2]=0;
3339         }
3340         /* Test only single code points */
3341         TestConv(usource,ulen,cp,"",NULL,0);
3342         /* Test codepoint repeated twice */
3343         usource[ulen]=usource[0];
3344         usource[ulen+1]=usource[1];
3345         ulen+=len;
3346         TestConv(usource,ulen,cp,"",NULL,0);
3347         /* Test codepoint repeated 3 times */
3348         usource[ulen]=usource[0];
3349         usource[ulen+1]=usource[1];
3350         ulen+=len;
3351         TestConv(usource,ulen,cp,"",NULL,0);
3352         /* Test codepoint in between 2 codepoints */
3353         nsrc[1]=usource[0];
3354         nsrc[2]=usource[1];
3355         nsrc[len+1]=0x5555;
3356         TestConv(nsrc,len+2,cp,"",NULL,0);
3357         uprv_memset(usource,0,sizeof(UChar)*10);
3358     }
3359 }
3360
3361 static void
3362 TestRoundTrippingAllUTF(void){
3363     if(!QUICK){
3364         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3365         TestFullRoundtrip("BOCU-1");
3366         log_verbose("Running exhaustive round trip test for SCSU\n");
3367         TestFullRoundtrip("SCSU");
3368         log_verbose("Running exhaustive round trip test for UTF-8\n");
3369         TestFullRoundtrip("UTF-8");
3370         log_verbose("Running exhaustive round trip test for CESU-8\n");
3371         TestFullRoundtrip("CESU-8");
3372         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3373         TestFullRoundtrip("UTF-16BE");
3374         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3375         TestFullRoundtrip("UTF-16LE");
3376         log_verbose("Running exhaustive round trip test for UTF-16\n");
3377         TestFullRoundtrip("UTF-16");
3378         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3379         TestFullRoundtrip("UTF-32BE");
3380         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3381         TestFullRoundtrip("UTF-32LE");
3382         log_verbose("Running exhaustive round trip test for UTF-32\n");
3383         TestFullRoundtrip("UTF-32");
3384         log_verbose("Running exhaustive round trip test for UTF-7\n");
3385         TestFullRoundtrip("UTF-7");
3386         log_verbose("Running exhaustive round trip test for UTF-7\n");
3387         TestFullRoundtrip("UTF-7,version=1");
3388         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3389         TestFullRoundtrip("IMAP-mailbox-name");
3390         log_verbose("Running exhaustive round trip test for GB18030\n");
3391         TestFullRoundtrip("GB18030");
3392     }
3393 }
3394
3395 static void
3396 TestSCSU() {
3397
3398     static const uint16_t germanUTF16[]={
3399         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3400     };
3401
3402     static const uint8_t germanSCSU[]={
3403         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3404     };
3405
3406     static const uint16_t russianUTF16[]={
3407         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3408     };
3409
3410     static const uint8_t russianSCSU[]={
3411         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3412     };
3413
3414     static const uint16_t japaneseUTF16[]={
3415         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3416         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3417         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3418         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3419         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3420         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3421         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3422         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3423         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3424         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3425         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3426         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3427         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3428         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3429         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3430     };
3431
3432     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3433      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3434     static const uint8_t japaneseSCSU[]={
3435         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3436         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3437         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3438         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3439         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3440         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3441         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3442         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3443         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3444         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3445         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3446         0xcb, 0x82
3447     };
3448
3449     static const uint16_t allFeaturesUTF16[]={
3450         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3451         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3452         0x01df, 0xf000, 0xdbff, 0xdfff
3453     };
3454
3455     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3456      * result here (34B vs. 35B)
3457      */
3458     static const uint8_t allFeaturesSCSU[]={
3459         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3460         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3461         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3462         0xdf, 0x14, 0x80, 0x15, 0xff
3463     };
3464     static const uint16_t monkeyIn[]={
3465         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3466         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3467         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3468         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3469         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3470         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3471         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3472         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3473         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3474         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3475         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3476         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3477         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3478         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3479         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3480         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3481         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3482         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3483         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3484         /* test non-BMP code points */
3485         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3486         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3487         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3488         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3489         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3490         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3491         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3492         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3493         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3494         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3495         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3496
3497
3498         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3499         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3500         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3501         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3502         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3503     };
3504     static const char *fTestCases [] = {
3505           "\\ud800\\udc00", /* smallest surrogate*/
3506           "\\ud8ff\\udcff",
3507           "\\udBff\\udFff", /* largest surrogate pair*/
3508           "\\ud834\\udc00",
3509           "\\U0010FFFF",
3510           "Hello \\u9292 \\u9192 World!",
3511           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3512           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3513
3514           "\\u0648\\u06c8", /* catch missing reset*/
3515           "\\u0648\\u06c8",
3516
3517           "\\u4444\\uE001", /* lowest quotable*/
3518           "\\u4444\\uf2FF", /* highest quotable*/
3519           "\\u4444\\uf188\\u4444",
3520           "\\u4444\\uf188\\uf288",
3521           "\\u4444\\uf188abc\\u0429\\uf288",
3522           "\\u9292\\u2222",
3523           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3524           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3525           "Hello World!123456",
3526           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3527
3528           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3529           "abc\\u4411d",      /* uses SQU*/
3530           "abc\\u4411\\u4412d",/* uses SCU*/
3531           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3532           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3533           "\\u9292\\u2222",
3534           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3535           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3536           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3537
3538           "", /* empty input*/
3539           "\\u0000", /* smallest BMP character*/
3540           "\\uFFFF", /* largest BMP character*/
3541
3542           /* regression tests*/
3543           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3544           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3545           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3546           "\\u0041\\u00df\\u0401\\u015f",
3547           "\\u9066\\u2123abc",
3548           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3549           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3550     };
3551     int i=0;
3552     for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3553         const char* cSrc = fTestCases[i];
3554         UErrorCode status = U_ZERO_ERROR;
3555         int32_t cSrcLen,srcLen;
3556         UChar* src;
3557         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3558         cSrcLen= srcLen =  uprv_strlen(fTestCases[i]);
3559         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3560         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3561         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3562         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3563         free(src);
3564     }
3565     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3566     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3567     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3568     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3569     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3570     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3571     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3572 }
3573 static void TestJitterbug2346(){
3574     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3575                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3576     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3577
3578     UChar uTarget[500]={'\0'};
3579     UChar* utarget=uTarget;
3580     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3581
3582     char cTarget[500]={'\0'};
3583     char* ctarget=cTarget;
3584     char* ctargetLimit=cTarget+sizeof(cTarget);
3585     const char* csource=source;
3586     UChar* temp = expected;
3587     UErrorCode err=U_ZERO_ERROR;
3588
3589     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3590     if(U_FAILURE(err)) {
3591         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3592         return;
3593     }
3594     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3595     if(U_FAILURE(err)) {
3596         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3597         return;
3598     }
3599     utargetLimit=utarget;
3600     utarget = uTarget;
3601     while(utarget<utargetLimit){
3602         if(*temp!=*utarget){
3603
3604             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3605         }
3606         utarget++;
3607         temp++;
3608     }
3609     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3610     if(U_FAILURE(err)) {
3611         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3612         return;
3613     }
3614     ctargetLimit=ctarget;
3615     ctarget =cTarget;
3616     ucnv_close(conv);
3617
3618
3619 }
3620 static void
3621 TestISO_2022_JP_1() {
3622     /* test input */
3623     static const uint16_t in[]={
3624         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3625         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3626         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3627         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3628         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3629         0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3630         0x201D, 0x000D, 0x000A,
3631         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3632         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3633         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3634         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3635         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3636         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3637       };
3638     const UChar* uSource;
3639     const UChar* uSourceLimit;
3640     const char* cSource;
3641     const char* cSourceLimit;
3642     UChar *uTargetLimit =NULL;
3643     UChar *uTarget;
3644     char *cTarget;
3645     const char *cTargetLimit;
3646     char *cBuf;
3647     UChar *uBuf,*test;
3648     int32_t uBufSize = 120;
3649     UErrorCode errorCode=U_ZERO_ERROR;
3650     UConverter *cnv;
3651
3652     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3653     if(U_FAILURE(errorCode)) {
3654         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3655         return;
3656     }
3657
3658     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3659     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3660     uSource = (const UChar*)&in[0];
3661     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3662     cTarget = cBuf;
3663     cTargetLimit = cBuf +uBufSize*5;
3664     uTarget = uBuf;
3665     uTargetLimit = uBuf+ uBufSize*5;
3666     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3667     if(U_FAILURE(errorCode)){
3668         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3669         return;
3670     }
3671     cSource = cBuf;
3672     cSourceLimit =cTarget;
3673     test =uBuf;
3674     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3675     if(U_FAILURE(errorCode)){
3676         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3677         return;
3678     }
3679     uSource = (const UChar*)&in[0];
3680     while(uSource<uSourceLimit){
3681         if(*test!=*uSource){
3682
3683             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3684         }
3685         uSource++;
3686         test++;
3687     }
3688     /*ucnv_close(cnv);
3689     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3690     /*Test for the condition where there is an invalid character*/
3691     ucnv_reset(cnv);
3692     {
3693         static const uint8_t source2[]={0x0e,0x24,0x053};
3694         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3695     }
3696     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3697     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3698     ucnv_close(cnv);
3699     free(uBuf);
3700     free(cBuf);
3701 }
3702
3703 static void
3704 TestISO_2022_JP_2() {
3705     /* test input */
3706     static const uint16_t in[]={
3707         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3708         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3709         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3710         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3711         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3712         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3713         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3714         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3715         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3716         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3717         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3718         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3719         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3720         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3721         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3722         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3723         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3724         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3725         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3726       };
3727     const UChar* uSource;
3728     const UChar* uSourceLimit;
3729     const char* cSource;
3730     const char* cSourceLimit;
3731     UChar *uTargetLimit =NULL;
3732     UChar *uTarget;
3733     char *cTarget;
3734     const char *cTargetLimit;
3735     char *cBuf;
3736     UChar *uBuf,*test;
3737     int32_t uBufSize = 120;
3738     UErrorCode errorCode=U_ZERO_ERROR;
3739     UConverter *cnv;
3740     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3741     int32_t* myOff= offsets;
3742     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3743     if(U_FAILURE(errorCode)) {
3744         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3745         return;
3746     }
3747
3748     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3749     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3750     uSource = (const UChar*)&in[0];
3751     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3752     cTarget = cBuf;
3753     cTargetLimit = cBuf +uBufSize*5;
3754     uTarget = uBuf;
3755     uTargetLimit = uBuf+ uBufSize*5;
3756     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3757     if(U_FAILURE(errorCode)){
3758         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3759         return;
3760     }
3761     cSource = cBuf;
3762     cSourceLimit =cTarget;
3763     test =uBuf;
3764     myOff=offsets;
3765     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3766     if(U_FAILURE(errorCode)){
3767         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3768         return;
3769     }
3770     uSource = (const UChar*)&in[0];
3771     while(uSource<uSourceLimit){
3772         if(*test!=*uSource){
3773
3774             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3775         }
3776         uSource++;
3777         test++;
3778     }
3779     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3780     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3781     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3782     /*Test for the condition where there is an invalid character*/
3783     ucnv_reset(cnv);
3784     {
3785         static const uint8_t source2[]={0x0e,0x24,0x053};
3786         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3787     }
3788     ucnv_close(cnv);
3789     free(uBuf);
3790     free(cBuf);
3791     free(offsets);
3792 }
3793
3794 static void
3795 TestISO_2022_KR() {
3796     /* test input */
3797     static const uint16_t in[]={
3798                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
3799                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
3800                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3801                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3802                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
3803                    ,0x53E3,0x53E4,0x000A,0x000D};
3804     const UChar* uSource;
3805     const UChar* uSourceLimit;
3806     const char* cSource;
3807     const char* cSourceLimit;
3808     UChar *uTargetLimit =NULL;
3809     UChar *uTarget;
3810     char *cTarget;
3811     const char *cTargetLimit;
3812     char *cBuf;
3813     UChar *uBuf,*test;
3814     int32_t uBufSize = 120;
3815     UErrorCode errorCode=U_ZERO_ERROR;
3816     UConverter *cnv;
3817     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3818     int32_t* myOff= offsets;
3819     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3820     if(U_FAILURE(errorCode)) {
3821         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3822         return;
3823     }
3824
3825     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3826     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3827     uSource = (const UChar*)&in[0];
3828     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3829     cTarget = cBuf;
3830     cTargetLimit = cBuf +uBufSize*5;
3831     uTarget = uBuf;
3832     uTargetLimit = uBuf+ uBufSize*5;
3833     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3834     if(U_FAILURE(errorCode)){
3835         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3836         return;
3837     }
3838     cSource = cBuf;
3839     cSourceLimit =cTarget;
3840     test =uBuf;
3841     myOff=offsets;
3842     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3843     if(U_FAILURE(errorCode)){
3844         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3845         return;
3846     }
3847     uSource = (const UChar*)&in[0];
3848     while(uSource<uSourceLimit){
3849         if(*test!=*uSource){
3850             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3851         }
3852         uSource++;
3853         test++;
3854     }
3855     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3856     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3857     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3858     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3859     TestJitterbug930("csISO2022KR");
3860     /*Test for the condition where there is an invalid character*/
3861     ucnv_reset(cnv);
3862     {
3863         static const uint8_t source2[]={0x1b,0x24,0x053};
3864         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
3865         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3866     }
3867     ucnv_close(cnv);
3868     free(uBuf);
3869     free(cBuf);
3870     free(offsets);
3871 }
3872
3873 static void
3874 TestISO_2022_KR_1() {
3875     /* test input */
3876     static const uint16_t in[]={
3877                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3878                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3879                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3880                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3881                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3882                    ,0x53E3,0x53E4,0x000A,0x000D};
3883     const UChar* uSource;
3884     const UChar* uSourceLimit;
3885     const char* cSource;
3886     const char* cSourceLimit;
3887     UChar *uTargetLimit =NULL;
3888     UChar *uTarget;
3889     char *cTarget;
3890     const char *cTargetLimit;
3891     char *cBuf;
3892     UChar *uBuf,*test;
3893     int32_t uBufSize = 120;
3894     UErrorCode errorCode=U_ZERO_ERROR;
3895     UConverter *cnv;
3896     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3897     int32_t* myOff= offsets;
3898     cnv=ucnv_open("ibm-25546", &errorCode);
3899     if(U_FAILURE(errorCode)) {
3900         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3901         return;
3902     }
3903
3904     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3905     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3906     uSource = (const UChar*)&in[0];
3907     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
3908     cTarget = cBuf;
3909     cTargetLimit = cBuf +uBufSize*5;
3910     uTarget = uBuf;
3911     uTargetLimit = uBuf+ uBufSize*5;
3912     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3913     if(U_FAILURE(errorCode)){
3914         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3915         return;
3916     }
3917     cSource = cBuf;
3918     cSourceLimit =cTarget;
3919     test =uBuf;
3920     myOff=offsets;
3921     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3922     if(U_FAILURE(errorCode)){
3923         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3924         return;
3925     }
3926     uSource = (const UChar*)&in[0];
3927     while(uSource<uSourceLimit){
3928         if(*test!=*uSource){
3929             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3930         }
3931         uSource++;
3932         test++;
3933     }
3934     ucnv_reset(cnv);
3935     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3936     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3937     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3938     ucnv_reset(cnv);
3939     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
3940         /*Test for the condition where there is an invalid character*/
3941     ucnv_reset(cnv);
3942     {
3943         static const uint8_t source2[]={0x1b,0x24,0x053};
3944         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
3945         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3946     }
3947     ucnv_close(cnv);
3948     free(uBuf);
3949     free(cBuf);
3950     free(offsets);
3951 }
3952
3953 static void TestJitterbug2411(){
3954     const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
3955                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
3956     UConverter* kr=NULL, *kr1=NULL;
3957     UErrorCode errorCode = U_ZERO_ERROR;
3958     UChar tgt[100]={'\0'};
3959     UChar* target = tgt;
3960     UChar* targetLimit = target+100;
3961     kr=ucnv_open("iso-2022-kr", &errorCode);
3962     if(U_FAILURE(errorCode)) {
3963         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
3964         return;
3965     }
3966     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3967     if(U_FAILURE(errorCode)) {
3968         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3969         return;
3970     }
3971     kr1 = ucnv_open("ibm-25546", &errorCode);
3972     if(U_FAILURE(errorCode)) {
3973         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
3974         return;
3975     }
3976     target = tgt;
3977     targetLimit = target+100;
3978     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
3979
3980     if(U_FAILURE(errorCode)) {
3981         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
3982         return;
3983     }
3984
3985     ucnv_close(kr);
3986     ucnv_close(kr1);
3987
3988 }
3989
3990 static void
3991 TestJIS(){
3992     /* From Unicode moved to testdata/conversion.txt */
3993     /*To Unicode*/
3994     {
3995         const uint8_t sampleTextJIS[] = {
3996             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
3997             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
3998             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
3999         };
4000         const uint16_t expectedISO2022JIS[] = {
4001             0x0041, 0x0042,
4002             0xFF81, 0xFF82,
4003             0x3000
4004         };
4005         int32_t  toISO2022JISOffs[]={
4006             3,4,
4007             8,9,
4008             16
4009         };
4010
4011         const uint8_t sampleTextJIS7[] = {
4012             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4013             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4014             0x1b,0x24,0x42,0x21,0x21,
4015             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4016             0x21,0x22,
4017             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4018         };
4019         const uint16_t expectedISO2022JIS7[] = {
4020             0x0041, 0x0042,
4021             0xFF81, 0xFF82,
4022             0x3000,
4023             0xFF81, 0xFF82,
4024             0x3001,
4025             0x3000
4026         };
4027         int32_t  toISO2022JIS7Offs[]={
4028             3,4,
4029             8,9,
4030             13,16,
4031             17,
4032             19,27
4033         };
4034         const uint8_t sampleTextJIS8[] = {
4035             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4036             0xa1,0xc8,0xd9,/*Katakana Set*/
4037             0x1b,0x28,0x42,
4038             0x41,0x42,
4039             0xb1,0xc3, /*Katakana Set*/
4040             0x1b,0x24,0x42,0x21,0x21
4041         };
4042         const uint16_t expectedISO2022JIS8[] = {
4043             0x0041, 0x0042,
4044             0xff61, 0xff88, 0xff99,
4045             0x0041, 0x0042,
4046             0xff71, 0xff83,
4047             0x3000
4048         };
4049         int32_t  toISO2022JIS8Offs[]={
4050             3, 4,  5,  6,
4051             7, 11, 12, 13,
4052             14, 18,
4053         };
4054
4055         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4056             sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4057         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4058             sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4059         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4060             sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4061     }
4062
4063 }
4064
4065 static void TestJitterbug915(){
4066 /* tests for roundtripping of the below sequence
4067 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4068 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4069 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4070 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4071 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4072 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4073 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4074 */
4075     static char cSource[]={
4076         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4077         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4078         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4079         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4080         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4081         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4082         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4083         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4084         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4085         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4086         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4087         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4088         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4089         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4090         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4091         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4092         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4093         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4094         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4095         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4096         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4097         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4098         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4099         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4100         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4101         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4102         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4103         0x37, 0x20, 0x2A, 0x2F
4104     };
4105     UChar uTarget[500]={'\0'};
4106     UChar* utarget=uTarget;
4107     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4108
4109     char cTarget[500]={'\0'};
4110     char* ctarget=cTarget;
4111     char* ctargetLimit=cTarget+sizeof(cTarget);
4112     const char* csource=cSource;
4113     char* tempSrc = cSource;
4114     UErrorCode err=U_ZERO_ERROR;
4115
4116     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4117     if(U_FAILURE(err)) {
4118         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4119         return;
4120     }
4121     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4122     if(U_FAILURE(err)) {
4123         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4124         return;
4125     }
4126     utargetLimit=utarget;
4127     utarget = uTarget;
4128     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4129     if(U_FAILURE(err)) {
4130         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4131         return;
4132     }
4133     ctargetLimit=ctarget;
4134     ctarget =cTarget;
4135     while(ctarget<ctargetLimit){
4136         if(*ctarget != *tempSrc){
4137             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4138         }
4139         ++ctarget;
4140         ++tempSrc;
4141     }
4142
4143     ucnv_close(conv);
4144 }
4145
4146 static void
4147 TestISO_2022_CN_EXT() {
4148     /* test input */
4149     static const uint16_t in[]={
4150                 /* test Non-BMP code points */
4151          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4152          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4153          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4154          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4155          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4156          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4157          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4158          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4159          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4160          0xD869, 0xDED5,
4161
4162          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4163          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4164          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4165          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4166          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4167          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4168          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4169          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4170          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4171          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4172          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4173          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4174          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4175          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4176          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4177          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4178          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4179          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4180
4181          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4182
4183       };
4184
4185     const UChar* uSource;
4186     const UChar* uSourceLimit;
4187     const char* cSource;
4188     const char* cSourceLimit;
4189     UChar *uTargetLimit =NULL;
4190     UChar *uTarget;
4191     char *cTarget;
4192     const char *cTargetLimit;
4193     char *cBuf;
4194     UChar *uBuf,*test;
4195     int32_t uBufSize = 180;
4196     UErrorCode errorCode=U_ZERO_ERROR;
4197     UConverter *cnv;
4198     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4199     int32_t* myOff= offsets;
4200     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4201     if(U_FAILURE(errorCode)) {
4202         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4203         return;
4204     }
4205
4206     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4207     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4208     uSource = (const UChar*)&in[0];
4209     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4210     cTarget = cBuf;
4211     cTargetLimit = cBuf +uBufSize*5;
4212     uTarget = uBuf;
4213     uTargetLimit = uBuf+ uBufSize*5;
4214     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4215     if(U_FAILURE(errorCode)){
4216         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4217         return;
4218     }
4219     cSource = cBuf;
4220     cSourceLimit =cTarget;
4221     test =uBuf;
4222     myOff=offsets;
4223     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4224     if(U_FAILURE(errorCode)){
4225         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4226         return;
4227     }
4228     uSource = (const UChar*)&in[0];
4229     while(uSource<uSourceLimit){
4230         if(*test!=*uSource){
4231             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4232         }
4233         else{
4234             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4235         }
4236         uSource++;
4237         test++;
4238     }
4239     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4240     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4241     /*Test for the condition where there is an invalid character*/
4242     ucnv_reset(cnv);
4243     {
4244         static const uint8_t source2[]={0x0e,0x24,0x053};
4245         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4246     }
4247     ucnv_close(cnv);
4248     free(uBuf);
4249     free(cBuf);
4250     free(offsets);
4251 }
4252
4253 static void
4254 TestISO_2022_CN() {
4255     /* test input */
4256     static const uint16_t in[]={
4257          /* jitterbug 951 */
4258          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4259          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4260          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4261          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4262          0x0020, 0x0045, 0x004e, 0x0044,
4263          /**/
4264          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4265          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4266          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4267          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4268          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4269          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4270          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4271          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4272          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4273          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4274          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4275          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4276          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4277          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4278          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4279          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4280          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4281
4282       };
4283     const UChar* uSource;
4284     const UChar* uSourceLimit;
4285     const char* cSource;
4286     const char* cSourceLimit;
4287     UChar *uTargetLimit =NULL;
4288     UChar *uTarget;
4289     char *cTarget;
4290     const char *cTargetLimit;
4291     char *cBuf;
4292     UChar *uBuf,*test;
4293     int32_t uBufSize = 180;
4294     UErrorCode errorCode=U_ZERO_ERROR;
4295     UConverter *cnv;
4296     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4297     int32_t* myOff= offsets;
4298     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4299     if(U_FAILURE(errorCode)) {
4300         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4301         return;
4302     }
4303
4304     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4305     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4306     uSource = (const UChar*)&in[0];
4307     uSourceLimit=(const UChar*)&in[sizeof(in)/2];
4308     cTarget = cBuf;
4309     cTargetLimit = cBuf +uBufSize*5;
4310     uTarget = uBuf;
4311     uTargetLimit = uBuf+ uBufSize*5;
4312     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4313     if(U_FAILURE(errorCode)){
4314         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4315         return;
4316     }
4317     cSource = cBuf;
4318     cSourceLimit =cTarget;
4319     test =uBuf;
4320     myOff=offsets;
4321     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4322     if(U_FAILURE(errorCode)){
4323         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4324         return;
4325     }
4326     uSource = (const UChar*)&in[0];
4327     while(uSource<uSourceLimit){
4328         if(*test!=*uSource){
4329             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4330         }
4331         else{
4332             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4333         }
4334         uSource++;
4335         test++;
4336     }
4337     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4338     TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4339     TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4340     TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
4341     TestJitterbug930("csISO2022CN");
4342     /*Test for the condition where there is an invalid character*/
4343     ucnv_reset(cnv);
4344     {
4345         static const uint8_t source2[]={0x0e,0x24,0x053};
4346         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4347     }
4348
4349     ucnv_close(cnv);
4350     free(uBuf);
4351     free(cBuf);
4352     free(offsets);
4353 }
4354
4355 static void
4356 TestEBCDIC_STATEFUL() {
4357     /* test input */
4358     static const uint8_t in[]={
4359         0x61,
4360         0x1a,
4361         0x0f, 0x4b,
4362         0x42,
4363         0x40,
4364         0x36,
4365     };
4366
4367     /* expected test results */
4368     static const int32_t results[]={
4369         /* number of bytes read, code point */
4370         1, 0x002f,
4371         1, 0x0092,
4372         2, 0x002e,
4373         1, 0xff62,
4374         1, 0x0020,
4375         1, 0x0096,
4376
4377     };
4378     static const uint8_t in2[]={
4379         0x0f,
4380         0xa1,
4381         0x01
4382     };
4383
4384     /* expected test results */
4385     static const int32_t results2[]={
4386         /* number of bytes read, code point */
4387         2, 0x203E,
4388         1, 0x0001,
4389     };
4390
4391     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4392     UErrorCode errorCode=U_ZERO_ERROR;
4393     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4394     if(U_FAILURE(errorCode)) {
4395         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4396         return;
4397     }
4398     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4399     ucnv_reset(cnv);
4400      /* Test the condition when source >= sourceLimit */
4401     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4402     ucnv_reset(cnv);
4403     /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4404     {
4405         static const uint8_t source1[]={0x0f};
4406         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4407     }
4408     /*Test for the condition where there is an invalid character*/
4409     ucnv_reset(cnv);
4410     {
4411         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4412         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4413     }
4414     ucnv_reset(cnv);
4415     source=(const char*)in2;
4416     limit=(const char*)in2+sizeof(in2);
4417     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4418     ucnv_close(cnv);
4419
4420 }
4421
4422 static void
4423 TestGB18030() {
4424     /* test input */
4425     static const uint8_t in[]={
4426         0x24,
4427         0x7f,
4428         0x81, 0x30, 0x81, 0x30,
4429         0xa8, 0xbf,
4430         0xa2, 0xe3,
4431         0xd2, 0xbb,
4432         0x82, 0x35, 0x8f, 0x33,
4433         0x84, 0x31, 0xa4, 0x39,
4434         0x90, 0x30, 0x81, 0x30,
4435         0xe3, 0x32, 0x9a, 0x35
4436 #if 0
4437         /*
4438          * Feature removed   markus 2000-oct-26
4439          * Only some codepages must match surrogate pairs into supplementary code points -
4440          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4441          * GB 18030 provides direct encodings for supplementary code points, therefore
4442          * it must not combine two single-encoded surrogates into one code point.
4443          */
4444         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4445 #endif
4446     };
4447
4448     /* expected test results */
4449     static const int32_t results[]={
4450         /* number of bytes read, code point */
4451         1, 0x24,
4452         1, 0x7f,
4453         4, 0x80,
4454         2, 0x1f9,
4455         2, 0x20ac,
4456         2, 0x4e00,
4457         4, 0x9fa6,
4458         4, 0xffff,
4459         4, 0x10000,
4460         4, 0x10ffff
4461 #if 0
4462         /* Feature removed. See comment above. */
4463         8, 0x10000
4464 #endif
4465     };
4466
4467 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4468     UErrorCode errorCode=U_ZERO_ERROR;
4469     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4470     if(U_FAILURE(errorCode)) {
4471         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4472         return;
4473     }
4474     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4475     ucnv_close(cnv);
4476 }
4477
4478 static void
4479 TestLMBCS() {
4480     /* LMBCS-1 string */
4481     static const uint8_t pszLMBCS[]={
4482         0x61,
4483         0x01, 0x29,
4484         0x81,
4485         0xA0,
4486         0x0F, 0x27,
4487         0x0F, 0x91,
4488         0x14, 0x0a, 0x74,
4489         0x14, 0xF6, 0x02,
4490         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4491         0x10, 0x88, 0xA0,
4492     };
4493
4494     /* Unicode UChar32 equivalents */
4495     static const UChar32 pszUnicode32[]={
4496         /* code point */
4497         0x00000061,
4498         0x00002013,
4499         0x000000FC,
4500         0x000000E1,
4501         0x00000007,
4502         0x00000091,
4503         0x00000a74,
4504         0x00000200,
4505         0x00023456, /* code point for surrogate pair */
4506         0x00005516
4507     };
4508
4509 /* Unicode UChar equivalents */
4510     static const UChar pszUnicode[]={
4511         /* code point */
4512         0x0061,
4513         0x2013,
4514         0x00FC,
4515         0x00E1,
4516         0x0007,
4517         0x0091,
4518         0x0a74,
4519         0x0200,
4520         0xD84D, /* low surrogate */
4521         0xDC56, /* high surrogate */
4522         0x5516
4523     };
4524
4525 /* expected test results */
4526     static const int offsets32[]={
4527         /* number of bytes read, code point */
4528         0,
4529         1,
4530         3,
4531         4,
4532         5,
4533         7,
4534         9,
4535         12,
4536         15,
4537         21,
4538         24
4539     };
4540
4541 /* expected test results */
4542     static const int offsets[]={
4543         /* number of bytes read, code point */
4544         0,
4545         1,
4546         3,
4547         4,
4548         5,
4549         7,
4550         9,
4551         12,
4552         15,
4553         18,
4554         21,
4555         24
4556     };
4557
4558
4559     UConverter *cnv;
4560
4561 #define NAME_LMBCS_1 "LMBCS-1"
4562 #define NAME_LMBCS_2 "LMBCS-2"
4563
4564
4565    /* Some basic open/close/property tests on some LMBCS converters */
4566     {
4567
4568       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4569       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4570       char get_subchars [1];
4571       const char * get_name;
4572       UConverter *cnv1;
4573       UConverter *cnv2;
4574
4575       int8_t len = sizeof(get_subchars);
4576
4577       UErrorCode errorCode=U_ZERO_ERROR;
4578
4579       /* Open */
4580       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4581       if(U_FAILURE(errorCode)) {
4582          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4583          return;
4584       }
4585       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4586       if(U_FAILURE(errorCode)) {
4587          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4588          return;
4589       }
4590
4591       /* Name */
4592       get_name = ucnv_getName (cnv1, &errorCode);
4593       if (strcmp(NAME_LMBCS_1,get_name)){
4594          log_err("Unexpected converter name: %s\n", get_name);
4595       }
4596       get_name = ucnv_getName (cnv2, &errorCode);
4597       if (strcmp(NAME_LMBCS_2,get_name)){
4598          log_err("Unexpected converter name: %s\n", get_name);
4599       }
4600
4601       /* substitution chars */
4602       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4603       if(U_FAILURE(errorCode)) {
4604          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4605       }
4606       if (len!=1){
4607          log_err("Unexpected length of sub chars\n");
4608       }
4609       if (get_subchars[0] != expected_subchars[0]){
4610            log_err("Unexpected value of sub chars\n");
4611       }
4612       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4613       if(U_FAILURE(errorCode)) {
4614          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4615       }
4616       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4617       if(U_FAILURE(errorCode)) {
4618          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4619       }
4620       if (len!=1){
4621          log_err("Unexpected length of sub chars\n");
4622       }
4623       if (get_subchars[0] != new_subchars[0]){
4624            log_err("Unexpected value of sub chars\n");
4625       }
4626       ucnv_close(cnv1);
4627       ucnv_close(cnv2);
4628
4629     }
4630
4631     /* LMBCS to Unicode - offsets */
4632     {
4633        UErrorCode errorCode=U_ZERO_ERROR;
4634
4635        const uint8_t * pSource = pszLMBCS;
4636        const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4637
4638        UChar Out [sizeof(pszUnicode) + 1];
4639        UChar * pOut = Out;
4640        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4641
4642        int32_t off [sizeof(offsets)];
4643
4644       /* last 'offset' in expected results is just the final size.
4645          (Makes other tests easier). Compensate here: */
4646
4647        off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4648
4649
4650
4651       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4652       if(U_FAILURE(errorCode)) {
4653            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4654            return;
4655       }
4656
4657
4658
4659       ucnv_toUnicode (cnv,
4660                       &pOut,
4661                       OutLimit,
4662                       (const char **)&pSource,
4663                       (const char *)sourceLimit,
4664                       off,
4665                       TRUE,
4666                       &errorCode);
4667
4668
4669        if (memcmp(off,offsets,sizeof(offsets)))
4670        {
4671          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4672        }
4673        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4674        {
4675          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4676        }
4677        ucnv_close(cnv);
4678     }
4679     {
4680    /* LMBCS to Unicode - getNextUChar */
4681       const char * sourceStart;
4682       const char *source=(const char *)pszLMBCS;
4683       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4684       const UChar32 *results= pszUnicode32;
4685       const int *off = offsets32;
4686
4687       UErrorCode errorCode=U_ZERO_ERROR;
4688       UChar32 uniChar;
4689
4690       cnv=ucnv_open("LMBCS-1", &errorCode);
4691       if(U_FAILURE(errorCode)) {
4692            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4693            return;
4694       }
4695       else
4696       {
4697
4698          while(source<limit) {
4699             sourceStart=source;
4700             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4701             if(U_FAILURE(errorCode)) {
4702                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4703                   break;
4704             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4705                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4706                    uniChar, (source-sourceStart), *results, *off);
4707                break;
4708             }
4709             results++;
4710             off++;
4711          }
4712        }
4713        ucnv_close(cnv);
4714     }
4715     { /* test locale & optimization group operations: Unicode to LMBCS */
4716
4717       UErrorCode errorCode=U_ZERO_ERROR;
4718       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4719       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4720       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4721       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4722       const UChar * pUniOut = uniString;
4723       UChar * pUniIn = uniString;
4724       uint8_t lmbcsString [4];
4725       const uint8_t * pLMBCSOut = lmbcsString;
4726       uint8_t * pLMBCSIn = lmbcsString;
4727
4728       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4729       ucnv_fromUnicode (cnv16he,
4730                         (char **)&pLMBCSIn, (const char *)(pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4731                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4732                         NULL, 1, &errorCode);
4733
4734       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4735       {
4736          log_err("LMBCS-16,locale=he gives unexpected translation\n");
4737       }
4738
4739       pLMBCSIn=lmbcsString;
4740       pUniOut = uniString;
4741       ucnv_fromUnicode (cnv01us,
4742                         (char **)&pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4743                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4744                         NULL, 1, &errorCode);
4745
4746       if (lmbcsString[0] != 0x9F)
4747       {
4748          log_err("LMBCS-1,locale=US gives unexpected translation\n");
4749       }
4750
4751       /* single byte char from mbcs char set */
4752       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
4753       pLMBCSOut = lmbcsString;
4754       pUniIn = uniString;
4755       ucnv_toUnicode (cnv16jp,
4756                         &pUniIn, pUniIn + 1,
4757                         (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 1),
4758                         NULL, 1, &errorCode);
4759       if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4760       {
4761            log_err("Unexpected results from LMBCS-16 single byte char\n");
4762       }
4763       /* convert to group 1: should be 3 bytes */
4764       pLMBCSIn = lmbcsString;
4765       pUniOut = uniString;
4766       ucnv_fromUnicode (cnv01us,
4767                         (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 3),
4768                         &pUniOut, pUniOut + 1,
4769                         NULL, 1, &errorCode);
4770       if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+3 || pUniOut != uniString+1
4771          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4772       {
4773            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4774       }
4775       pLMBCSOut = lmbcsString;
4776       pUniIn = uniString;
4777       ucnv_toUnicode (cnv01us,
4778                         &pUniIn, pUniIn + 1,
4779                         (const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 3),
4780                         NULL, 1, &errorCode);
4781       if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4782       {
4783            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4784       }
4785       pLMBCSIn = lmbcsString;
4786       pUniOut = uniString;
4787       ucnv_fromUnicode (cnv16jp,
4788                         (char **)&pLMBCSIn, (const char *)(pLMBCSIn + 1),
4789                         &pUniOut, pUniOut + 1,
4790                         NULL, 1, &errorCode);
4791       if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
4792       {
4793            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4794       }
4795       ucnv_close(cnv16he);
4796       ucnv_close(cnv16jp);
4797       ucnv_close(cnv01us);
4798     }
4799     {
4800        /* Small source buffer testing, LMBCS -> Unicode */
4801
4802        UErrorCode errorCode=U_ZERO_ERROR;
4803
4804        const uint8_t * pSource = pszLMBCS;
4805        const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
4806        int codepointCount = 0;
4807
4808        UChar Out [sizeof(pszUnicode) + 1];
4809        UChar * pOut = Out;
4810        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4811
4812
4813        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4814        if(U_FAILURE(errorCode)) {
4815            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4816            return;
4817        }
4818
4819
4820        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4821        {
4822            ucnv_toUnicode (cnv,
4823                &pOut,
4824                OutLimit,
4825                (const char **)&pSource,
4826                (const char *)(pSource+1), /* claim that this is a 1- byte buffer */
4827                NULL,
4828                FALSE,    /* FALSE means there might be more chars in the next buffer */
4829                &errorCode);
4830
4831            if (U_SUCCESS (errorCode))
4832            {
4833                if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1])
4834                {
4835                    /* we are on to the next code point: check value */
4836
4837                    if (Out[0] != pszUnicode[codepointCount]){
4838                        log_err("LMBCS->Uni result %lx should have been %lx \n",
4839                            Out[0], pszUnicode[codepointCount]);
4840                    }
4841
4842                    pOut = Out; /* reset for accumulating next code point */
4843                    codepointCount++;
4844                }
4845            }
4846            else
4847            {
4848                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
4849            }
4850        }
4851        {
4852          /* limits & surrogate error testing */
4853          uint8_t LIn [sizeof(pszLMBCS)];
4854          const uint8_t * pLIn = LIn;
4855
4856          char LOut [sizeof(pszLMBCS)];
4857          char * pLOut = LOut;
4858
4859          UChar UOut [sizeof(pszUnicode)];
4860          UChar * pUOut = UOut;
4861
4862          UChar UIn [sizeof(pszUnicode)];
4863          const UChar * pUIn = UIn;
4864
4865          int32_t off [sizeof(offsets)];
4866          UChar32 uniChar;
4867
4868          errorCode=U_ZERO_ERROR;
4869
4870          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
4871          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
4872          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4873          {
4874             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
4875          }
4876          errorCode=U_ZERO_ERROR;
4877          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
4878          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4879          {
4880             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
4881          }
4882          errorCode=U_ZERO_ERROR;
4883
4884          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
4885          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
4886          {
4887             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
4888          }
4889          errorCode=U_ZERO_ERROR;
4890
4891          /* 0 byte source request - no error, no pointer movement */
4892          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
4893          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
4894          if(U_FAILURE(errorCode)) {
4895             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
4896          }
4897          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
4898          {
4899               log_err("Unexpected pointer move in 0 byte source request \n");
4900          }
4901          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
4902          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
4903          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
4904          {
4905             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
4906          }
4907          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
4908          {
4909             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
4910          }
4911          errorCode = U_ZERO_ERROR;
4912
4913          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
4914
4915          pUIn = pszUnicode;
4916          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
4917          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
4918          {
4919             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
4920          }
4921
4922          errorCode = U_ZERO_ERROR;
4923
4924          pLIn = pszLMBCS;
4925          ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
4926          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4])
4927          {
4928             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
4929          }
4930
4931          /* unpaired or chopped LMBCS surrogates */
4932
4933          /* OK high surrogate, Low surrogate is chopped */
4934          LIn [0] = 0x14;
4935          LIn [1] = 0xD8;
4936          LIn [2] = 0x01;
4937          LIn [3] = 0x14;
4938          LIn [4] = 0xDC;
4939          pLIn = LIn;
4940          errorCode = U_ZERO_ERROR;
4941          pUOut = UOut;
4942
4943          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4944          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
4945          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
4946          {
4947             log_err("Unexpected results on chopped low surrogate\n");
4948          }
4949
4950          /* chopped at surrogate boundary */
4951          LIn [0] = 0x14;
4952          LIn [1] = 0xD8;
4953          LIn [2] = 0x01;
4954          pLIn = LIn;
4955          errorCode = U_ZERO_ERROR;
4956          pUOut = UOut;
4957
4958          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
4959          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
4960          {
4961             log_err("Unexpected results on chopped at surrogate boundary \n");
4962          }
4963
4964          /* unpaired surrogate plus valid Unichar */
4965          LIn [0] = 0x14;
4966          LIn [1] = 0xD8;
4967          LIn [2] = 0x01;
4968          LIn [3] = 0x14;
4969          LIn [4] = 0xC9;
4970          LIn [5] = 0xD0;
4971          pLIn = LIn;
4972          errorCode = U_ZERO_ERROR;
4973          pUOut = UOut;
4974
4975          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
4976          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
4977          {
4978             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
4979          }
4980
4981       /* unpaired surrogate plus chopped Unichar */
4982          LIn [0] = 0x14;
4983          LIn [1] = 0xD8;
4984          LIn [2] = 0x01;
4985          LIn [3] = 0x14;
4986          LIn [4] = 0xC9;
4987
4988          pLIn = LIn;
4989          errorCode = U_ZERO_ERROR;
4990          pUOut = UOut;
4991
4992          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
4993          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
4994          {
4995             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
4996          }
4997
4998          /* unpaired surrogate plus valid non-Unichar */
4999          LIn [0] = 0x14;
5000          LIn [1] = 0xD8;
5001          LIn [2] = 0x01;
5002          LIn [3] = 0x0F;
5003          LIn [4] = 0x3B;
5004
5005          pLIn = LIn;
5006          errorCode = U_ZERO_ERROR;
5007          pUOut = UOut;
5008
5009          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5010          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5011          {
5012             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5013          }
5014
5015          /* unpaired surrogate plus chopped non-Unichar */
5016          LIn [0] = 0x14;
5017          LIn [1] = 0xD8;
5018          LIn [2] = 0x01;
5019          LIn [3] = 0x0F;
5020
5021          pLIn = LIn;
5022          errorCode = U_ZERO_ERROR;
5023          pUOut = UOut;
5024
5025          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5026
5027          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5028          {
5029             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5030          }
5031        }
5032     }
5033    ucnv_close(cnv);  /* final cleanup */
5034 }
5035
5036
5037 static void TestJitterbug255()
5038 {
5039     const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5040     const uint8_t *testBuffer = testBytes;
5041     const uint8_t *testEnd = testBytes + sizeof(testBytes);
5042     UErrorCode status = U_ZERO_ERROR;
5043     UChar32 result;
5044     UConverter *cnv = 0;
5045
5046     cnv = ucnv_open("shift-jis", &status);
5047     if (U_FAILURE(status) || cnv == 0) {
5048         log_data_err("Failed to open the converter for SJIS.\n");
5049                 return;
5050     }
5051     while (testBuffer != testEnd)
5052     {
5053         result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status);
5054         if (U_FAILURE(status))
5055         {
5056             log_err("Failed to convert the next UChar for SJIS.\n");
5057             break;
5058         }
5059     }
5060     ucnv_close(cnv);
5061 }
5062
5063 static void TestEBCDICUS4XML()
5064 {
5065     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5066     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5067     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5068     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5069     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5070     UChar *unicodes = unicodes_x;
5071     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5072     char *target = target_x;
5073     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5074     UErrorCode status = U_ZERO_ERROR;
5075     UConverter *cnv = 0;
5076
5077     cnv = ucnv_open("ebcdic-xml-us", &status);
5078     if (U_FAILURE(status) || cnv == 0) {
5079         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5080         return;
5081     }
5082     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5083     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5084         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5085             u_errorName(status));
5086         printUSeqErr(unicodes_x, 3);
5087         printUSeqErr(toUnicodeMaps, 3);
5088     }
5089     status = U_ZERO_ERROR;
5090     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5091     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5092         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5093             u_errorName(status));
5094         printSeqErr((const unsigned char*)target_x, 3);
5095         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5096     }
5097     ucnv_close(cnv);
5098 }
5099
5100 #if !UCONFIG_NO_COLLATION
5101
5102 static void TestJitterbug981(){
5103     const UChar* rules;
5104     int32_t rules_length, target_cap, bytes_needed, buff_size;
5105     UErrorCode status = U_ZERO_ERROR;
5106     UConverter *utf8cnv;
5107     UCollator* myCollator;
5108     char *buff;
5109     int numNeeded=0;
5110     utf8cnv = ucnv_open ("utf8", &status);
5111     if(U_FAILURE(status)){
5112         log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
5113         return;
5114     }
5115     myCollator = ucol_open("zh", &status);
5116     if(U_FAILURE(status)){
5117         log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
5118         return;
5119     }
5120
5121     rules = ucol_getRules(myCollator, &rules_length);
5122     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5123     buff = malloc(buff_size);
5124
5125     target_cap = 0;
5126     do {
5127         ucnv_reset(utf8cnv);
5128         status = U_ZERO_ERROR;
5129         if(target_cap >= buff_size) {
5130             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5131             return;
5132         }
5133         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5134             rules, rules_length, &status);
5135         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5136         if(numNeeded!=0 && numNeeded!= bytes_needed){
5137             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5138         }
5139         numNeeded = bytes_needed;
5140     } while (status == U_BUFFER_OVERFLOW_ERROR);
5141     ucol_close(myCollator);
5142     ucnv_close(utf8cnv);
5143     free(buff);
5144 }
5145
5146 #endif
5147
5148 static void TestJitterbug1293(){
5149     UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5150     char target[256];
5151     UErrorCode status = U_ZERO_ERROR;
5152     UConverter* conv=NULL;
5153     int32_t target_cap, bytes_needed, numNeeded = 0;
5154     conv = ucnv_open("shift-jis",&status);
5155     if(U_FAILURE(status)){
5156       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5157       return;
5158     }
5159
5160     do{
5161         target_cap =0;
5162         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5163         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5164         if(numNeeded!=0 && numNeeded!= bytes_needed){
5165           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5166         }
5167         numNeeded = bytes_needed;
5168     } while (status == U_BUFFER_OVERFLOW_ERROR);
5169     if(U_FAILURE(status)){
5170       log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5171       return;
5172     }
5173     ucnv_close(conv);
5174 }
5175
5176 #endif